Which economic event(s), or conditions, are correlated with real estate price fluctuations since the year 2000?
Tidying Census Population Estimates data
# Population Estimates 2000-2010
pop2000_2010 <- read.csv(("~/Desktop/Weylandt Project (Housing)/Raw Data /Population Estimate 2000_2010.csv")) |>
filter(SEX == 0) |>
filter(AGE == 999) |>
select(-SEX,-AGE,-DIVISION,-STATE,-ESTIMATESBASE2000,-CENSUS2010POP) |>
filter(NAME != 'United States')
glimpse(pop2000_2010)
## Rows: 51
## Columns: 13
## $ REGION <int> 3, 4, 4, 3, 4, 4, 1, 3, 3, 3, 3, 4, 4, 2, 2, 2, 2, 3, …
## $ NAME <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Californi…
## $ POPESTIMATE2000 <int> 4452173, 627963, 5160586, 2678588, 33987977, 4326921, …
## $ POPESTIMATE2001 <int> 4467634, 633714, 5273477, 2691571, 34479458, 4425687, …
## $ POPESTIMATE2002 <int> 4480089, 642337, 5396255, 2705927, 34871843, 4490406, …
## $ POPESTIMATE2003 <int> 4503491, 648414, 5510364, 2724816, 35253159, 4528732, …
## $ POPESTIMATE2004 <int> 4530729, 659286, 5652404, 2749686, 35574576, 4575013, …
## $ POPESTIMATE2005 <int> 4569805, 666946, 5839077, 2781097, 35827943, 4631888, …
## $ POPESTIMATE2006 <int> 4628981, 675302, 6029141, 2821761, 36021202, 4720423, …
## $ POPESTIMATE2007 <int> 4672840, 680300, 6167681, 2848650, 36250311, 4803868, …
## $ POPESTIMATE2008 <int> 4718206, 687455, 6280362, 2874554, 36604337, 4889730, …
## $ POPESTIMATE2009 <int> 4757938, 698895, 6343154, 2896843, 36961229, 4972195, …
## $ POPESTIMATE2010 <int> 4785298, 713985, 6413737, 2921606, 37349363, 5049071, …
# Population Estimates 2011-2019
pop2011_2019 <- read.csv("~/Desktop/Weylandt Project (Housing)/Raw Data /Population Estimates 2010_2019.csv") |>
select(-SUMLEV,-DIVISION,-STATE,-CENSUS2010POP,-ESTIMATESBASE2010,-POPESTIMATE2010) |>
select(NAME,POPESTIMATE2011:POPESTIMATE2019) |>
filter(NAME != 'United States') |>
filter(NAME != 'Northeast Region') |>
filter(NAME != 'Midwest Region') |>
filter(NAME != 'West Region') |>
filter(NAME != 'South Region') |>
filter(NAME != 'Puerto Rico')
glimpse(pop2011_2019)
## Rows: 51
## Columns: 10
## $ NAME <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Californi…
## $ POPESTIMATE2011 <int> 4799069, 722128, 6472643, 2940667, 37638369, 5121108, …
## $ POPESTIMATE2012 <int> 4815588, 730443, 6554978, 2952164, 37948800, 5192647, …
## $ POPESTIMATE2013 <int> 4830081, 737068, 6632764, 2959400, 38260787, 5269035, …
## $ POPESTIMATE2014 <int> 4841799, 736283, 6730413, 2967392, 38596972, 5350101, …
## $ POPESTIMATE2015 <int> 4852347, 737498, 6829676, 2978048, 38918045, 5450623, …
## $ POPESTIMATE2016 <int> 4863525, 741456, 6941072, 2989918, 39167117, 5539215, …
## $ POPESTIMATE2017 <int> 4874486, 739700, 7044008, 3001345, 39358497, 5611885, …
## $ POPESTIMATE2018 <int> 4887681, 735139, 7158024, 3009733, 39461588, 5691287, …
## $ POPESTIMATE2019 <int> 4903185, 731545, 7278717, 3017804, 39512223, 5758736, …
# Population Estimates 2020-2023
pop2020_2023 <- read.csv("~/Desktop/Weylandt Project (Housing)/Raw Data /Population Estimate 2020_2023.csv") |>
select(NAME:POPESTIMATE2023) |>
filter(NAME != 'United States') |>
filter(NAME != 'Northeast Region') |>
filter(NAME != 'Midwest Region') |>
filter(NAME != 'West Region') |>
filter(NAME != 'South Region') |>
filter(NAME != 'Puerto Rico') |>
filter(NAME != 'New England') |>
filter(NAME != 'Middle Atlantic') |>
filter(NAME != 'East North Central') |>
filter(NAME != 'West North Central') |>
filter(NAME != 'South Atlantic') |>
filter(NAME != 'East South Central') |>
filter(NAME != 'West South Central') |>
filter(NAME != 'Mountain') |>
filter(NAME != 'Pacific') |>
select(-ESTIMATESBASE2020)
glimpse(pop2020_2023)
## Rows: 51
## Columns: 5
## $ NAME <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Californi…
## $ POPESTIMATE2020 <int> 5031864, 732964, 7186683, 3014348, 39503200, 5785219, …
## $ POPESTIMATE2021 <int> 5050380, 734923, 7272487, 3028443, 39145060, 5811596, …
## $ POPESTIMATE2022 <int> 5073903, 733276, 7365684, 3046404, 39040616, 5841039, …
## $ POPESTIMATE2023 <int> 5108468, 733406, 7431344, 3067732, 38965193, 5877610, …
# Population Estimates of all states from 2000-2023
pop_estimate <- left_join(pop2000_2010,pop2011_2019,join_by(NAME==NAME)) |>
left_join(pop2020_2023,join_by(NAME==NAME)) |>
rename('STATE'=NAME)
glimpse(pop_estimate)
## Rows: 51
## Columns: 26
## $ REGION <int> 3, 4, 4, 3, 4, 4, 1, 3, 3, 3, 3, 4, 4, 2, 2, 2, 2, 3, …
## $ STATE <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Californi…
## $ POPESTIMATE2000 <int> 4452173, 627963, 5160586, 2678588, 33987977, 4326921, …
## $ POPESTIMATE2001 <int> 4467634, 633714, 5273477, 2691571, 34479458, 4425687, …
## $ POPESTIMATE2002 <int> 4480089, 642337, 5396255, 2705927, 34871843, 4490406, …
## $ POPESTIMATE2003 <int> 4503491, 648414, 5510364, 2724816, 35253159, 4528732, …
## $ POPESTIMATE2004 <int> 4530729, 659286, 5652404, 2749686, 35574576, 4575013, …
## $ POPESTIMATE2005 <int> 4569805, 666946, 5839077, 2781097, 35827943, 4631888, …
## $ POPESTIMATE2006 <int> 4628981, 675302, 6029141, 2821761, 36021202, 4720423, …
## $ POPESTIMATE2007 <int> 4672840, 680300, 6167681, 2848650, 36250311, 4803868, …
## $ POPESTIMATE2008 <int> 4718206, 687455, 6280362, 2874554, 36604337, 4889730, …
## $ POPESTIMATE2009 <int> 4757938, 698895, 6343154, 2896843, 36961229, 4972195, …
## $ POPESTIMATE2010 <int> 4785298, 713985, 6413737, 2921606, 37349363, 5049071, …
## $ POPESTIMATE2011 <int> 4799069, 722128, 6472643, 2940667, 37638369, 5121108, …
## $ POPESTIMATE2012 <int> 4815588, 730443, 6554978, 2952164, 37948800, 5192647, …
## $ POPESTIMATE2013 <int> 4830081, 737068, 6632764, 2959400, 38260787, 5269035, …
## $ POPESTIMATE2014 <int> 4841799, 736283, 6730413, 2967392, 38596972, 5350101, …
## $ POPESTIMATE2015 <int> 4852347, 737498, 6829676, 2978048, 38918045, 5450623, …
## $ POPESTIMATE2016 <int> 4863525, 741456, 6941072, 2989918, 39167117, 5539215, …
## $ POPESTIMATE2017 <int> 4874486, 739700, 7044008, 3001345, 39358497, 5611885, …
## $ POPESTIMATE2018 <int> 4887681, 735139, 7158024, 3009733, 39461588, 5691287, …
## $ POPESTIMATE2019 <int> 4903185, 731545, 7278717, 3017804, 39512223, 5758736, …
## $ POPESTIMATE2020 <int> 5031864, 732964, 7186683, 3014348, 39503200, 5785219, …
## $ POPESTIMATE2021 <int> 5050380, 734923, 7272487, 3028443, 39145060, 5811596, …
## $ POPESTIMATE2022 <int> 5073903, 733276, 7365684, 3046404, 39040616, 5841039, …
## $ POPESTIMATE2023 <int> 5108468, 733406, 7431344, 3067732, 38965193, 5877610, …
# Separating Region and State to join other data sets
region_state <- pop_estimate |>
select(REGION,STATE)
glimpse(region_state)
## Rows: 51
## Columns: 2
## $ REGION <int> 3, 4, 4, 3, 4, 4, 1, 3, 3, 3, 3, 4, 4, 2, 2, 2, 2, 3, 3, 1, 3, …
## $ STATE <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colo…
# Population Estimates by Region
allRegions_PopEstimates <- pop_estimate |>
select(-STATE) |>
group_by(REGION) |>
summarise_all(mean) |>
pivot_longer(cols = "POPESTIMATE2000":"POPESTIMATE2023",
names_to = c("YearlyPopEstimate"),
values_to = "Population")
allRegions_PopEstimates$Year <- gsub("POPESTIMATE(\\d{4})", "\\1", allRegions_PopEstimates$YearlyPopEstimate)
allRegions_PopEstimates <- allRegions_PopEstimates |> select(-YearlyPopEstimate)
glimpse(allRegions_PopEstimates)
## Rows: 96
## Columns: 3
## $ REGION <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ Population <dbl> 5962922, 5990614, 6015991, 6037161, 6047059, 6050137, 60580…
## $ Year <chr> "2000", "2001", "2002", "2003", "2004", "2005", "2006", "20…
# Convert To CSV
#write.csv(allRegions_PopEstimates, 'allRegions_PopEstimates.csv', row.names=FALSE)
Tidying Zillow Average Home Prices data
# Original Zillow data
Zillow <- read.csv("~/Desktop/Weylandt Project (Housing)/Raw Data /Zillow.csv") |>
select(-RegionID,-SizeRank,-RegionType,-StateName)
Zillow[is.na(Zillow)] <- 0
glimpse(Zillow)
## Rows: 51
## Columns: 291
## $ RegionName <chr> "California", "Texas", "Florida", "New York", "Pennsylvani…
## $ X2000.01.31 <dbl> 190252.33, 110404.23, 105986.02, 150566.75, 96332.77, 1243…
## $ X2000.02.29 <dbl> 190891.82, 110464.23, 106215.60, 151102.92, 96538.81, 1244…
## $ X2000.03.31 <dbl> 191755.30, 110493.33, 106493.89, 151619.07, 96732.44, 1246…
## $ X2000.04.30 <dbl> 193629.64, 110637.75, 107059.58, 152729.15, 97127.03, 1251…
## $ X2000.05.31 <dbl> 195808.29, 110731.28, 107669.62, 153877.70, 97532.38, 1257…
## $ X2000.06.30 <dbl> 198159.04, 110836.15, 108292.06, 155119.60, 97948.96, 1264…
## $ X2000.07.31 <dbl> 200641.60, 110905.06, 108917.46, 156236.06, 98391.34, 1271…
## $ X2000.08.31 <dbl> 203233.53, 111108.94, 109554.40, 157213.33, 98813.64, 1282…
## $ X2000.09.30 <dbl> 205831.47, 111376.30, 110214.58, 158081.72, 99243.93, 1294…
## $ X2000.10.31 <dbl> 208295.16, 111662.78, 110888.53, 159012.53, 99656.59, 1308…
## $ X2000.11.30 <dbl> 210721.43, 111993.36, 111588.88, 160152.84, 100111.84, 131…
## $ X2000.12.31 <dbl> 213096.00, 112360.56, 112311.22, 161404.53, 100582.73, 132…
## $ X2001.01.31 <dbl> 215104.78, 112579.79, 112971.72, 162605.10, 101047.52, 133…
## $ X2001.02.28 <dbl> 217091.04, 112546.66, 113627.92, 163604.28, 101472.50, 134…
## $ X2001.03.31 <dbl> 219326.75, 112327.29, 114313.89, 164505.48, 101899.93, 134…
## $ X2001.04.30 <dbl> 222106.55, 112198.77, 115124.18, 165446.37, 102358.57, 135…
## $ X2001.05.31 <dbl> 224892.10, 112241.92, 116011.70, 166485.49, 102797.49, 136…
## $ X2001.06.30 <dbl> 227422.54, 112372.77, 116938.13, 167632.43, 103244.37, 137…
## $ X2001.07.31 <dbl> 229791.61, 112526.48, 117934.07, 168836.55, 103725.17, 137…
## $ X2001.08.31 <dbl> 232074.24, 112664.54, 118989.69, 170100.51, 104254.80, 139…
## $ X2001.09.30 <dbl> 234289.75, 112809.75, 120084.07, 171336.72, 104790.87, 140…
## $ X2001.10.31 <dbl> 236361.29, 112961.63, 121181.45, 172637.88, 105283.46, 141…
## $ X2001.11.30 <dbl> 238307.38, 113072.60, 122233.89, 173999.96, 105783.66, 142…
## $ X2001.12.31 <dbl> 240127.2, 113162.3, 123266.1, 175446.2, 106256.8, 143577.4…
## $ X2002.01.31 <dbl> 241597.59, 113192.85, 124198.00, 176874.15, 106727.58, 144…
## $ X2002.02.28 <dbl> 243100.85, 113277.65, 125060.05, 178170.49, 107193.83, 144…
## $ X2002.03.31 <dbl> 244860.38, 113490.15, 125902.88, 179269.91, 107679.96, 145…
## $ X2002.04.30 <dbl> 247234.6, 113863.8, 126851.2, 180353.7, 108214.9, 146308.6…
## $ X2002.05.31 <dbl> 250006.8, 114304.1, 127891.7, 181509.0, 108799.5, 147090.0…
## $ X2002.06.30 <dbl> 253082.53, 114797.34, 128979.66, 182850.51, 109443.05, 147…
## $ X2002.07.31 <dbl> 256640.83, 115298.72, 130087.27, 184291.52, 110163.33, 148…
## $ X2002.08.31 <dbl> 260471.23, 115828.55, 131270.97, 185900.57, 110976.26, 149…
## $ X2002.09.30 <dbl> 264341.07, 116295.52, 132442.05, 187558.42, 111819.87, 151…
## $ X2002.10.31 <dbl> 268013.44, 116696.25, 133611.59, 189211.30, 112646.35, 152…
## $ X2002.11.30 <dbl> 271611.37, 117037.32, 134722.64, 190817.39, 113357.31, 153…
## $ X2002.12.31 <dbl> 275159.29, 117327.93, 135926.35, 192480.51, 114033.66, 154…
## $ X2003.01.31 <dbl> 278294.28, 117550.76, 137112.98, 194104.98, 114681.53, 155…
## $ X2003.02.28 <dbl> 281217.07, 117697.84, 138332.42, 195685.41, 115333.20, 155…
## $ X2003.03.31 <dbl> 284313.05, 117864.08, 139582.90, 197371.98, 115956.08, 156…
## $ X2003.04.30 <dbl> 287911.69, 118059.09, 140976.11, 199033.73, 116614.62, 157…
## $ X2003.05.31 <dbl> 291908.22, 118280.33, 142447.73, 200734.29, 117420.70, 158…
## $ X2003.06.30 <dbl> 295604.89, 118503.29, 143907.90, 202420.79, 118394.31, 159…
## $ X2003.07.31 <dbl> 299507.68, 118723.14, 145353.52, 204190.00, 119463.63, 160…
## $ X2003.08.31 <dbl> 303618.14, 118978.73, 146831.61, 206198.58, 120494.95, 161…
## $ X2003.09.30 <dbl> 308184.36, 119210.56, 148366.05, 208373.28, 121506.90, 162…
## $ X2003.10.31 <dbl> 312632.18, 119446.64, 149920.62, 210611.37, 122463.50, 163…
## $ X2003.11.30 <dbl> 316918.02, 119617.06, 151445.62, 212568.53, 123330.40, 165…
## $ X2003.12.31 <dbl> 321062.46, 119761.34, 152904.71, 214323.88, 124102.55, 165…
## $ X2004.01.31 <dbl> 325352.95, 120122.35, 154303.55, 216032.57, 124787.33, 166…
## $ X2004.02.29 <dbl> 330185.30, 120934.99, 155688.80, 217651.87, 125576.10, 167…
## $ X2004.03.31 <dbl> 335697.13, 122221.40, 157206.49, 219009.19, 126368.80, 168…
## $ X2004.04.30 <dbl> 342024.59, 123497.99, 158937.06, 220358.10, 127249.37, 169…
## $ X2004.05.31 <dbl> 349310.12, 124552.91, 160963.08, 221945.75, 128285.77, 170…
## $ X2004.06.30 <dbl> 357440.89, 125229.99, 163261.31, 223904.54, 129509.15, 171…
## $ X2004.07.31 <dbl> 365753.52, 125862.90, 165814.52, 226107.37, 130945.32, 173…
## $ X2004.08.31 <dbl> 373686.71, 126438.60, 168626.25, 228373.52, 132348.59, 174…
## $ X2004.09.30 <dbl> 381199.71, 127088.64, 171481.39, 230759.44, 133749.88, 175…
## $ X2004.10.31 <dbl> 388415.35, 127693.44, 174224.24, 233103.98, 134970.30, 177…
## $ X2004.11.30 <dbl> 394759.12, 128234.50, 176796.84, 235479.60, 136205.00, 178…
## $ X2004.12.31 <dbl> 400888.10, 128707.36, 179418.31, 237750.05, 137410.89, 179…
## $ X2005.01.31 <dbl> 406342.52, 129210.98, 182110.16, 239914.42, 138653.19, 180…
## $ X2005.02.28 <dbl> 412254.3, 129634.7, 184988.7, 242023.1, 139661.0, 182014.5…
## $ X2005.03.31 <dbl> 418468.5, 130012.2, 188216.1, 243963.3, 140626.0, 183146.1…
## $ X2005.04.30 <dbl> 425475.5, 130345.0, 192020.0, 246010.9, 141675.9, 184389.1…
## $ X2005.05.31 <dbl> 432291.5, 130707.0, 196264.5, 248185.3, 142939.7, 185658.5…
## $ X2005.06.30 <dbl> 438594.3, 131102.6, 200865.7, 250432.0, 144401.8, 186938.1…
## $ X2005.07.31 <dbl> 444624.6, 131452.7, 205776.0, 252796.2, 145966.2, 188164.6…
## $ X2005.08.31 <dbl> 450741.6, 131780.3, 210851.0, 255267.8, 147605.5, 189516.8…
## $ X2005.09.30 <dbl> 456886.6, 132108.4, 215825.1, 257862.7, 149145.9, 191067.5…
## $ X2005.10.31 <dbl> 462918.3, 132374.2, 220406.2, 260503.3, 150550.5, 192755.3…
## $ X2005.11.30 <dbl> 467811.4, 132684.9, 224641.6, 262890.4, 151839.2, 194098.9…
## $ X2005.12.31 <dbl> 472142.6, 132938.0, 228610.3, 265269.0, 153048.8, 195244.2…
## $ X2006.01.31 <dbl> 475751.3, 133145.6, 232278.6, 267348.2, 154122.4, 196246.0…
## $ X2006.02.28 <dbl> 478960.9, 133355.1, 235682.0, 269250.1, 155058.2, 197342.6…
## $ X2006.03.31 <dbl> 482018.1, 133609.6, 239218.8, 270820.3, 155936.5, 198463.8…
## $ X2006.04.30 <dbl> 484762.9, 134017.8, 243156.6, 272184.2, 156865.9, 199469.0…
## $ X2006.05.31 <dbl> 487490.5, 134576.0, 246932.9, 273472.5, 157905.9, 200486.7…
## $ X2006.06.30 <dbl> 489377.0, 135194.9, 250094.5, 274803.1, 159044.8, 201223.0…
## $ X2006.07.31 <dbl> 490077.5, 135812.0, 252264.1, 276048.5, 160099.1, 201783.9…
## $ X2006.08.31 <dbl> 490083.7, 136360.2, 253688.3, 277162.8, 160952.1, 202202.8…
## $ X2006.09.30 <dbl> 489033.5, 136943.1, 254285.0, 277882.6, 161546.8, 202538.9…
## $ X2006.10.31 <dbl> 487836.7, 137663.2, 254258.0, 278411.4, 162009.8, 202805.7…
## $ X2006.11.30 <dbl> 486560.6, 138352.9, 253877.0, 278717.3, 162379.8, 203043.3…
## $ X2006.12.31 <dbl> 485453.4, 139037.0, 253114.8, 279042.3, 162692.0, 203364.6…
## $ X2007.01.31 <dbl> 484539.6, 139398.1, 252150.5, 279313.5, 163007.9, 203859.2…
## $ X2007.02.28 <dbl> 483467.6, 139540.3, 251108.2, 279534.5, 163385.4, 204355.1…
## $ X2007.03.31 <dbl> 481934.0, 139522.5, 249910.6, 279827.5, 163859.0, 204751.4…
## $ X2007.04.30 <dbl> 479967.9, 139738.6, 248527.6, 279947.3, 164296.2, 204936.2…
## $ X2007.05.31 <dbl> 476901.3, 140133.9, 246726.7, 280177.0, 164716.6, 204956.4…
## $ X2007.06.30 <dbl> 473369.6, 140647.9, 244475.9, 280212.1, 165036.5, 204837.6…
## $ X2007.07.31 <dbl> 468849.7, 141106.6, 241764.5, 280268.1, 165381.4, 204567.9…
## $ X2007.08.31 <dbl> 464136.5, 141594.1, 238767.8, 280345.6, 165629.0, 204225.3…
## $ X2007.09.30 <dbl> 458839.5, 142074.5, 235699.9, 280351.5, 165753.5, 203680.7…
## $ X2007.10.31 <dbl> 453280.8, 142516.9, 232562.1, 280555.0, 165835.1, 203074.3…
## $ X2007.11.30 <dbl> 447693.1, 142885.5, 229281.1, 280759.9, 165939.4, 202442.8…
## $ X2007.12.31 <dbl> 441703.2, 143225.5, 225953.7, 280871.1, 166021.4, 201693.6…
## $ X2008.01.31 <dbl> 435738.7, 143088.1, 222338.7, 280939.9, 165978.7, 200876.0…
## $ X2008.02.29 <dbl> 428698.7, 142690.7, 218770.8, 280802.5, 165948.6, 199267.0…
## $ X2008.03.31 <dbl> 420288.9, 142105.5, 214960.3, 280645.7, 165883.4, 198512.8…
## $ X2008.04.30 <dbl> 410431.0, 141798.4, 211249.8, 280288.8, 165838.0, 197106.3…
## $ X2008.05.31 <dbl> 399937.8, 141578.4, 207263.4, 279692.0, 165637.4, 196611.5…
## $ X2008.06.30 <dbl> 389747.7, 141381.1, 203386.7, 278877.4, 165317.3, 194983.0…
## $ X2008.07.31 <dbl> 379427.2, 141217.9, 199422.9, 277797.9, 164748.0, 193925.9…
## $ X2008.08.31 <dbl> 369491.6, 140870.1, 195301.9, 276630.4, 164007.0, 191945.2…
## $ X2008.09.30 <dbl> 360312.3, 140367.0, 191110.8, 275325.4, 163225.1, 190261.0…
## $ X2008.10.31 <dbl> 352374.5, 139768.5, 186871.7, 273757.2, 162546.4, 187975.5…
## $ X2008.11.30 <dbl> 344679.0, 139181.3, 182703.6, 272040.3, 161907.6, 185834.3…
## $ X2008.12.31 <dbl> 337295.4, 138638.3, 178718.4, 270394.1, 161219.0, 183319.7…
## $ X2009.01.31 <dbl> 330393.3, 138144.3, 174970.2, 268871.3, 160516.6, 180608.5…
## $ X2009.02.28 <dbl> 325367.0, 137884.9, 171639.7, 267561.0, 160055.1, 178448.0…
## $ X2009.03.31 <dbl> 321204.0, 137778.4, 168508.7, 266399.4, 159827.2, 176341.8…
## $ X2009.04.30 <dbl> 317292.2, 137762.0, 165391.1, 265410.2, 159689.8, 175010.0…
## $ X2009.05.31 <dbl> 313501.5, 137761.8, 162375.1, 264501.0, 159451.0, 173431.5…
## $ X2009.06.30 <dbl> 310188.7, 137737.9, 159506.1, 263513.6, 159123.4, 172172.0…
## $ X2009.07.31 <dbl> 307731.9, 137748.7, 157051.3, 262490.3, 158765.0, 170935.7…
## $ X2009.08.31 <dbl> 305678.6, 137633.4, 154789.6, 261385.7, 158343.5, 169815.6…
## $ X2009.09.30 <dbl> 304070.2, 137481.2, 152718.9, 260455.3, 157945.2, 168286.6…
## $ X2009.10.31 <dbl> 303212.8, 137279.2, 150967.0, 259735.4, 157673.7, 166798.1…
## $ X2009.11.30 <dbl> 304057.5, 137321.6, 149799.0, 259275.3, 157618.6, 165343.5…
## $ X2009.12.31 <dbl> 305990.8, 137445.5, 148911.3, 258966.7, 157704.1, 164600.2…
## $ X2010.01.31 <dbl> 307954.36, 137505.72, 148227.96, 258741.54, 157947.67, 163…
## $ X2010.02.28 <dbl> 308774.67, 137492.24, 147632.90, 258723.21, 158287.22, 163…
## $ X2010.03.31 <dbl> 309822.77, 137517.05, 147227.55, 258801.00, 158759.16, 162…
## $ X2010.04.30 <dbl> 311128.19, 137690.46, 146873.64, 259191.12, 159217.80, 162…
## $ X2010.05.31 <dbl> 312565.53, 137890.72, 146414.76, 259608.33, 159518.72, 162…
## $ X2010.06.30 <dbl> 312499.69, 137936.00, 145722.79, 260166.34, 159571.76, 162…
## $ X2010.07.31 <dbl> 311346.90, 137691.54, 144763.80, 260523.49, 159328.61, 161…
## $ X2010.08.31 <dbl> 309377.75, 137219.39, 143566.24, 260437.15, 158866.71, 160…
## $ X2010.09.30 <dbl> 307368.59, 136660.12, 142258.93, 259897.09, 158178.67, 158…
## $ X2010.10.31 <dbl> 305213.04, 136097.40, 140891.75, 258921.09, 157366.24, 157…
## $ X2010.11.30 <dbl> 303303.70, 135513.63, 139507.23, 257899.82, 156472.95, 155…
## $ X2010.12.31 <dbl> 301943.75, 135006.55, 138187.50, 256864.99, 155629.91, 154…
## $ X2011.01.31 <dbl> 300884.79, 134565.93, 136868.70, 256134.99, 154897.02, 152…
## $ X2011.02.28 <dbl> 299870.25, 134102.34, 135618.16, 255574.97, 154303.17, 151…
## $ X2011.03.31 <dbl> 298375.69, 133646.77, 134340.95, 255230.39, 153850.45, 150…
## $ X2011.04.30 <dbl> 296784.89, 133205.94, 133105.25, 254767.25, 153366.58, 149…
## $ X2011.05.31 <dbl> 294394.87, 132856.83, 131871.11, 254270.70, 152806.97, 148…
## $ X2011.06.30 <dbl> 292471.27, 132501.02, 130755.53, 253530.70, 152092.52, 147…
## $ X2011.07.31 <dbl> 290796.22, 132303.55, 129727.48, 252806.75, 151379.14, 146…
## $ X2011.08.31 <dbl> 288969.17, 132187.39, 128918.63, 252099.75, 150786.77, 145…
## $ X2011.09.30 <dbl> 286788.2, 132145.8, 128310.6, 251454.4, 150361.8, 145095.9…
## $ X2011.10.31 <dbl> 284459.72, 132034.37, 127962.04, 250811.88, 150013.15, 143…
## $ X2011.11.30 <dbl> 283309.96, 131826.06, 127783.86, 250112.42, 149664.21, 142…
## $ X2011.12.31 <dbl> 282560.62, 131582.74, 127702.65, 249520.94, 149324.32, 141…
## $ X2012.01.31 <dbl> 282174.35, 131471.32, 127730.85, 249000.51, 149028.89, 140…
## $ X2012.02.29 <dbl> 281612.98, 131624.41, 127850.87, 248793.90, 148963.62, 139…
## $ X2012.03.31 <dbl> 280982.04, 131897.30, 128042.87, 248982.27, 149278.18, 138…
## $ X2012.04.30 <dbl> 280551.19, 132171.80, 128400.70, 249375.38, 149742.98, 138…
## $ X2012.05.31 <dbl> 280747.24, 132478.21, 128863.97, 249865.72, 150186.67, 137…
## $ X2012.06.30 <dbl> 281856.82, 132909.96, 129491.70, 250225.12, 150365.78, 137…
## $ X2012.07.31 <dbl> 283465.90, 133399.29, 130121.35, 250558.77, 150440.25, 138…
## $ X2012.08.31 <dbl> 286010.20, 133764.67, 130681.24, 250570.15, 150319.22, 138…
## $ X2012.09.30 <dbl> 289446.23, 134077.93, 131276.86, 250597.05, 150199.69, 138…
## $ X2012.10.31 <dbl> 293831.14, 134390.14, 132015.33, 250436.28, 150024.31, 138…
## $ X2012.11.30 <dbl> 298632.6, 134750.5, 133061.1, 250421.2, 149895.4, 137892.9…
## $ X2012.12.31 <dbl> 303742.95, 135112.13, 134454.81, 250395.57, 149809.93, 137…
## $ X2013.01.31 <dbl> 308583.76, 135617.03, 136048.79, 250796.57, 149804.20, 137…
## $ X2013.02.28 <dbl> 312867.7, 136258.6, 137676.1, 251463.5, 149824.9, 138131.8…
## $ X2013.03.31 <dbl> 316547.55, 137019.04, 139336.64, 252248.87, 150052.34, 138…
## $ X2013.04.30 <dbl> 320879.4, 137740.9, 141047.0, 252990.5, 150477.9, 139959.0…
## $ X2013.05.31 <dbl> 326796.9, 138564.8, 142976.1, 253830.2, 151107.6, 140865.5…
## $ X2013.06.30 <dbl> 333957.4, 139548.0, 144866.4, 254996.8, 151638.9, 141977.2…
## $ X2013.07.31 <dbl> 341130.4, 140491.5, 146682.3, 256159.3, 151795.5, 142884.2…
## $ X2013.08.31 <dbl> 347991.2, 141406.2, 148416.0, 257203.3, 151850.2, 144077.0…
## $ X2013.09.30 <dbl> 354714.7, 142120.2, 150100.8, 257958.1, 151781.6, 145162.1…
## $ X2013.10.31 <dbl> 361355.1, 142934.3, 151879.7, 258759.3, 151994.2, 146547.9…
## $ X2013.11.30 <dbl> 366925.2, 143679.9, 153450.8, 259423.2, 152146.8, 147746.2…
## $ X2013.12.31 <dbl> 371412.0, 144516.5, 155053.8, 259871.6, 152354.3, 148877.7…
## $ X2014.01.31 <dbl> 374659.4, 145498.9, 156579.6, 260256.9, 152491.2, 149701.2…
## $ X2014.02.28 <dbl> 377344.3, 146607.9, 158223.9, 260784.8, 152587.1, 150591.5…
## $ X2014.03.31 <dbl> 379786.0, 147726.7, 159766.0, 261754.0, 152682.6, 151366.3…
## $ X2014.04.30 <dbl> 382763.7, 148699.3, 161139.3, 262882.2, 152921.7, 152157.7…
## $ X2014.05.31 <dbl> 385941.4, 149684.4, 162493.7, 264223.4, 153358.6, 152974.6…
## $ X2014.06.30 <dbl> 388558.2, 150684.6, 163616.2, 265148.5, 153820.4, 153847.5…
## $ X2014.07.31 <dbl> 390278.8, 151823.6, 164772.6, 265909.9, 154300.8, 154814.5…
## $ X2014.08.31 <dbl> 391787.3, 152847.9, 165756.7, 266362.6, 154537.1, 155612.9…
## $ X2014.09.30 <dbl> 394277.9, 153751.0, 166779.5, 266970.1, 154559.4, 156197.3…
## $ X2014.10.31 <dbl> 397437.2, 154391.3, 167637.9, 267441.2, 154235.8, 156546.9…
## $ X2014.11.30 <dbl> 400977.8, 155200.4, 168710.6, 268088.3, 154033.2, 156877.8…
## $ X2014.12.31 <dbl> 404247.0, 156147.2, 169982.6, 268640.6, 154015.0, 157236.4…
## $ X2015.01.31 <dbl> 407153.5, 157331.9, 171371.4, 269054.2, 154092.7, 157461.6…
## $ X2015.02.28 <dbl> 409610.0, 158559.7, 172588.5, 269298.1, 154143.6, 157481.4…
## $ X2015.03.31 <dbl> 411620.7, 159760.0, 173663.2, 269545.7, 154330.6, 157710.7…
## $ X2015.04.30 <dbl> 413566.8, 160883.3, 174742.5, 269995.1, 154741.3, 158097.1…
## $ X2015.05.31 <dbl> 415767.3, 161958.0, 175965.1, 270802.1, 155321.3, 158831.8…
## $ X2015.06.30 <dbl> 418022.3, 163028.5, 177218.6, 271686.6, 155838.7, 159434.4…
## $ X2015.07.31 <dbl> 419799.2, 164141.5, 178647.9, 272787.7, 156454.6, 160078.9…
## $ X2015.08.31 <dbl> 420990.1, 165302.7, 180161.0, 274108.4, 157162.3, 160654.2…
## $ X2015.09.30 <dbl> 422008.7, 166486.7, 181807.0, 275855.2, 157855.2, 161322.0…
## $ X2015.10.31 <dbl> 423264.3, 167679.7, 183526.9, 277744.4, 158378.1, 162251.2…
## $ X2015.11.30 <dbl> 424312.2, 168730.3, 185185.4, 279288.1, 158642.5, 163135.8…
## $ X2015.12.31 <dbl> 425268.1, 169819.9, 186725.1, 280539.3, 158994.0, 163994.3…
## $ X2016.01.31 <dbl> 428434.8, 170986.7, 188144.4, 281424.5, 159472.1, 164819.1…
## $ X2016.02.29 <dbl> 433762.0, 172203.9, 189489.8, 282116.1, 159962.0, 165708.6…
## $ X2016.03.31 <dbl> 439813.5, 173101.4, 190726.5, 282429.2, 160002.4, 166322.3…
## $ X2016.04.30 <dbl> 444385.8, 173955.4, 192036.4, 282988.6, 159995.3, 166888.0…
## $ X2016.05.31 <dbl> 447418.8, 174779.6, 193332.7, 283846.9, 160188.8, 167440.6…
## $ X2016.06.30 <dbl> 450099.0, 175761.9, 194718.5, 285177.9, 160682.1, 168117.6…
## $ X2016.07.31 <dbl> 451079.2, 176580.9, 195934.9, 286272.2, 161067.6, 168423.3…
## $ X2016.08.31 <dbl> 450770.7, 177377.1, 197195.2, 287055.3, 161454.4, 168620.5…
## $ X2016.09.30 <dbl> 449289.3, 178287.5, 198611.0, 287657.9, 161942.4, 168788.4…
## $ X2016.10.31 <dbl> 448952.6, 179291.5, 200186.0, 288497.0, 162563.4, 169309.2…
## $ X2016.11.30 <dbl> 450021.3, 180421.1, 201627.6, 289793.4, 163131.8, 169963.7…
## $ X2016.12.31 <dbl> 452600.8, 181506.5, 202823.3, 291583.6, 163773.1, 170889.6…
## $ X2017.01.31 <dbl> 457480.7, 182574.4, 203870.9, 293224.8, 164334.1, 171832.4…
## $ X2017.02.28 <dbl> 463902.4, 183527.9, 205043.4, 294957.3, 164831.3, 172864.3…
## $ X2017.03.31 <dbl> 470876.7, 184544.6, 206412.2, 296486.4, 165211.9, 173967.1…
## $ X2017.04.30 <dbl> 476572.4, 185670.1, 207866.5, 298546.8, 165654.0, 174841.8…
## $ X2017.05.31 <dbl> 480748.5, 186683.6, 209140.2, 300206.3, 166117.8, 175482.0…
## $ X2017.06.30 <dbl> 484327.4, 187597.0, 210453.8, 301663.4, 166604.3, 175675.4…
## $ X2017.07.31 <dbl> 486069.8, 188304.0, 211701.8, 302738.2, 167114.3, 175821.5…
## $ X2017.08.31 <dbl> 487575.8, 189053.5, 213110.8, 304068.7, 167673.5, 176009.6…
## $ X2017.09.30 <dbl> 489161.5, 189814.5, 214475.5, 305419.4, 168298.7, 176609.4…
## $ X2017.10.31 <dbl> 492609.7, 190722.2, 215939.0, 307107.3, 169058.9, 177434.1…
## $ X2017.11.30 <dbl> 496433.5, 191721.7, 217445.1, 308476.2, 169960.7, 178458.0…
## $ X2017.12.31 <dbl> 500927.5, 192831.4, 219021.9, 310004.2, 170865.1, 179479.8…
## $ X2018.01.31 <dbl> 506002.1, 193931.8, 220424.3, 311384.4, 171673.4, 180567.6…
## $ X2018.02.28 <dbl> 511598.8, 194933.1, 221656.0, 312490.0, 172379.0, 181514.0…
## $ X2018.03.31 <dbl> 516700.6, 196106.0, 222895.0, 313930.3, 173355.9, 182641.8…
## $ X2018.04.30 <dbl> 520897.0, 197203.4, 224122.9, 315540.7, 174283.9, 183483.8…
## $ X2018.05.31 <dbl> 524736.3, 198347.8, 225396.4, 317883.7, 175021.0, 184137.8…
## $ X2018.06.30 <dbl> 527861.4, 199216.9, 226469.1, 319998.1, 175385.1, 184095.9…
## $ X2018.07.31 <dbl> 530704.6, 200210.4, 227859.7, 322385.7, 175765.3, 184151.3…
## $ X2018.08.31 <dbl> 533754.8, 201104.2, 229218.2, 324714.0, 176202.8, 184082.9…
## $ X2018.09.30 <dbl> 537196.2, 201986.0, 230597.7, 327102.6, 176621.1, 184223.3…
## $ X2018.10.31 <dbl> 540856.9, 202658.6, 231732.9, 328684.8, 176863.6, 184200.7…
## $ X2018.11.30 <dbl> 543445.5, 203421.3, 232938.4, 329810.2, 177136.7, 184354.7…
## $ X2018.12.31 <dbl> 544962.3, 204269.4, 234241.3, 330470.4, 177504.4, 184731.7…
## $ X2019.01.31 <dbl> 544489.2, 205109.6, 235417.0, 331544.2, 178158.2, 185526.7…
## $ X2019.02.28 <dbl> 542862.7, 205850.1, 236472.0, 333056.6, 179141.6, 186691.6…
## $ X2019.03.31 <dbl> 541019.8, 206490.8, 237185.4, 334696.2, 180176.0, 187787.2…
## $ X2019.04.30 <dbl> 539962.0, 207031.8, 237700.5, 336075.6, 181011.5, 188611.3…
## $ X2019.05.31 <dbl> 539328.8, 207518.0, 237977.4, 336863.7, 181560.4, 188978.9…
## $ X2019.06.30 <dbl> 538938.3, 207989.6, 238378.5, 337825.4, 182159.3, 189167.5…
## $ X2019.07.31 <dbl> 539348.7, 208490.1, 238897.1, 338806.0, 182834.7, 189065.1…
## $ X2019.08.31 <dbl> 541064.8, 209089.9, 239604.5, 339979.1, 183543.5, 188964.7…
## $ X2019.09.30 <dbl> 543688.0, 209745.3, 240307.6, 340620.4, 184186.9, 188706.5…
## $ X2019.10.31 <dbl> 546780.2, 210622.7, 241239.5, 341343.4, 184997.6, 188775.9…
## $ X2019.11.30 <dbl> 550147.0, 211661.6, 242514.0, 342787.6, 186029.6, 189094.9…
## $ X2019.12.31 <dbl> 553636.3, 212792.8, 244084.7, 344624.3, 187148.8, 189782.5…
## $ X2020.01.31 <dbl> 556486.2, 213941.2, 245987.8, 346888.5, 188253.8, 190751.2…
## $ X2020.02.29 <dbl> 558778.1, 215052.3, 247780.7, 348614.7, 189412.0, 191879.9…
## $ X2020.03.31 <dbl> 560986.2, 216135.8, 249355.9, 350203.5, 190654.9, 192983.7…
## $ X2020.04.30 <dbl> 563025.1, 216917.0, 250314.0, 351523.1, 192017.8, 193779.7…
## $ X2020.05.31 <dbl> 563282.7, 217195.9, 250578.2, 352671.2, 192844.6, 193882.8…
## $ X2020.06.30 <dbl> 562062.7, 217362.9, 250710.7, 353291.8, 193186.4, 193586.3…
## $ X2020.07.31 <dbl> 561905.2, 218015.5, 251276.7, 353467.6, 193483.7, 193309.7…
## $ X2020.08.31 <dbl> 565186.1, 219433.1, 252836.7, 353870.7, 194478.8, 193796.0…
## $ X2020.09.30 <dbl> 572469.7, 221573.2, 255338.8, 355902.3, 196595.7, 195343.0…
## $ X2020.10.31 <dbl> 581822.3, 223988.6, 258299.5, 359232.3, 199254.8, 197438.0…
## $ X2020.11.30 <dbl> 592712.9, 226743.2, 261775.7, 363633.2, 202436.3, 199914.0…
## $ X2020.12.31 <dbl> 603576.0, 229412.0, 265230.7, 368487.6, 205431.3, 202233.8…
## $ X2021.01.31 <dbl> 612596.5, 232213.7, 268806.7, 373082.8, 208188.7, 204531.8…
## $ X2021.02.28 <dbl> 620314.3, 235198.1, 272369.3, 377878.0, 210880.3, 207040.3…
## $ X2021.03.31 <dbl> 627306.0, 238577.0, 276176.2, 382358.6, 213682.0, 209598.3…
## $ X2021.04.30 <dbl> 636459.0, 242326.8, 280409.8, 386521.6, 216555.3, 212008.4…
## $ X2021.05.31 <dbl> 647419.3, 246618.0, 285533.0, 390683.3, 219441.2, 214175.5…
## $ X2021.06.30 <dbl> 659186.9, 251209.4, 291805.2, 394581.2, 222466.7, 216434.1…
## $ X2021.07.31 <dbl> 669822.8, 255421.6, 298228.1, 398024.8, 224661.5, 218356.4…
## $ X2021.08.31 <dbl> 677181.2, 258632.5, 303942.3, 400124.3, 225513.9, 219476.3…
## $ X2021.09.30 <dbl> 682170.9, 260979.7, 308572.4, 401162.2, 224774.3, 219624.4…
## $ X2021.10.31 <dbl> 686625.5, 263414.6, 313131.1, 402661.6, 224184.2, 219781.2…
## $ X2021.11.30 <dbl> 692498.8, 266567.0, 318230.8, 404915.8, 224289.5, 220462.0…
## $ X2021.12.31 <dbl> 699739.4, 270190.7, 323743.4, 407580.3, 225236.9, 221851.0…
## $ X2022.01.31 <dbl> 709837.1, 274958.1, 330663.5, 410719.1, 227530.8, 224290.9…
## $ X2022.02.28 <dbl> 722794.8, 280290.1, 338712.4, 414524.3, 230895.8, 227666.3…
## $ X2022.03.31 <dbl> 738419.6, 286319.3, 348212.8, 419603.0, 235106.4, 231725.7…
## $ X2022.04.30 <dbl> 752628.5, 292169.1, 358203.3, 424799.4, 238922.8, 235521.7…
## $ X2022.05.31 <dbl> 763338.9, 297545.9, 367778.1, 429542.3, 241871.0, 238074.4…
## $ X2022.06.30 <dbl> 769033.5, 302239.0, 376288.9, 434080.3, 244173.0, 239808.1…
## $ X2022.07.31 <dbl> 769344.7, 305092.6, 382063.6, 437141.2, 245058.4, 240163.5…
## $ X2022.08.31 <dbl> 764650.1, 305988.4, 384986.6, 438058.1, 244851.7, 239698.0…
## $ X2022.09.30 <dbl> 757141.7, 305328.0, 385360.3, 436505.6, 243906.8, 238433.9…
## $ X2022.10.31 <dbl> 750382.4, 304383.7, 384882.7, 434695.1, 243612.8, 237587.6…
## $ X2022.11.30 <dbl> 745070.5, 303387.7, 384183.5, 433155.1, 243808.8, 237114.1…
## $ X2022.12.31 <dbl> 740517.2, 302107.9, 383346.3, 431588.4, 244013.9, 236856.7…
## $ X2023.01.31 <dbl> 733433.3, 300419.2, 382012.6, 429948.6, 243834.9, 236348.6…
## $ X2023.02.28 <dbl> 726150.5, 298813.3, 380883.4, 428586.3, 243450.5, 236127.7…
## $ X2023.03.31 <dbl> 720793.3, 297741.5, 380424.0, 428679.5, 243602.5, 236515.7…
## $ X2023.04.30 <dbl> 720203.2, 297371.6, 380864.7, 430156.3, 244405.4, 237842.6…
## $ X2023.05.31 <dbl> 723208.9, 297691.2, 381907.4, 432573.3, 245913.8, 239675.3…
## $ X2023.06.30 <dbl> 728472.3, 298520.4, 383565.7, 435414.0, 247702.2, 241772.7…
## $ X2023.07.31 <dbl> 735567.6, 299267.1, 385252.1, 438426.2, 249442.2, 243733.8…
## $ X2023.08.31 <dbl> 743596.1, 299846.6, 386923.3, 441487.5, 251109.6, 245662.5…
## $ X2023.09.30 <dbl> 751268.3, 299884.5, 388085.5, 444085.4, 252410.6, 247137.7…
## $ X2023.10.31 <dbl> 757506.6, 299654.4, 389138.5, 445927.0, 253291.4, 248324.4…
## $ X2023.11.30 <dbl> 761970.8, 299166.0, 390061.9, 447465.0, 253820.7, 249164.1…
## $ X2023.12.31 <dbl> 764813.1, 298562.8, 390885.9, 449149.1, 254254.4, 249790.3…
## $ X2024.01.31 <dbl> 765351.8, 298364.6, 391584.8, 450934.0, 254753.3, 250247.7…
## $ X2024.02.29 <dbl> 765196.9, 298623.9, 392305.6, 453138.4, 255570.2, 251267.1…
# Averaging columns horizontally to consolidate values by year
years <- 2000:2023
for (year in years) {
monthly_cols <- grep(paste0("X", year), names(Zillow), value = TRUE)
Zillow[[paste0(year, "_Yearly_Avg")]] <- rowMeans(Zillow[monthly_cols], na.rm = TRUE)
}
# Zillow data formatted
Zillow_new <- Zillow |>
select(RegionName,"2000_Yearly_Avg":"2023_Yearly_Avg") |>
rename('STATE'=RegionName) |>
inner_join(region_state, join_by("STATE"=="STATE")) |> # From Census Population Estimate data
select(REGION,STATE,"2000_Yearly_Avg":"2023_Yearly_Avg")
glimpse(Zillow_new)
## Rows: 51
## Columns: 26
## $ REGION <int> 4, 3, 3, 1, 1, 2, 2, 3, 3, 2, 1, 3, 4, 4, 1, 3, 2, 3…
## $ STATE <chr> "California", "Texas", "Florida", "New York", "Penns…
## $ `2000_Yearly_Avg` <dbl> 200192.97, 111081.16, 108765.99, 155593.02, 98251.04…
## $ `2001_Yearly_Avg` <dbl> 228074.6, 112622.0, 117723.1, 168553.1, 103576.3, 13…
## $ `2002_Yearly_Avg` <dbl> 256343.33, 115117.52, 129745.37, 184107.29, 110087.9…
## $ `2003_Yearly_Avg` <dbl> 298431.00, 118641.07, 144765.18, 203801.40, 119146.8…
## $ `2004_Yearly_Avg` <dbl> 362059.5, 125048.8, 165560.2, 225873.0, 130617.2, 17…
## $ `2005_Yearly_Avg` <dbl> 440712.6, 131195.9, 204214.6, 252093.2, 145509.5, 18…
## $ `2006_Yearly_Avg` <dbl> 485617.2, 135672.3, 247404.2, 274595.3, 159051.1, 20…
## $ `2007_Yearly_Avg` <dbl> 467890.2, 141032.0, 241410.7, 280180.3, 164905.1, 20…
## $ `2008_Yearly_Avg` <dbl> 385701.9, 141057.1, 201008.2, 277266.0, 164354.7, 19…
## $ `2009_Yearly_Avg` <dbl> 312390.7, 137664.9, 159719.0, 263213.8, 158892.8, 17…
## $ `2010_Yearly_Avg` <dbl> 308441.58, 137018.40, 144272.92, 259147.93, 158262.1…
## $ `2011_Yearly_Avg` <dbl> 291638.81, 132746.56, 131080.41, 253026.26, 151903.8…
## $ `2012_Yearly_Avg` <dbl> 286921.13, 133170.64, 130165.97, 249935.16, 149854.5…
## $ `2013_Yearly_Avg` <dbl> 338596.8, 139991.4, 145627.9, 255475.1, 151235.7, 14…
## $ `2014_Yearly_Avg` <dbl> 389004.9, 151088.6, 163788.2, 264871.9, 153628.6, 15…
## $ `2015_Yearly_Avg` <dbl> 417615.3, 163640.2, 178466.9, 273392.1, 156329.6, 16…
## $ `2016_Yearly_Avg` <dbl> 445552.3, 176187.8, 195402.2, 285736.9, 161186.3, 16…
## $ `2017_Yearly_Avg` <dbl> 482223.8, 187753.7, 211206.8, 301908.2, 167143.7, 17…
## $ `2018_Yearly_Avg` <dbl> 528226.4, 199449.1, 227296.0, 321199.6, 175182.7, 18…
## $ `2019_Yearly_Avg` <dbl> 543438.8, 208532.7, 239148.2, 338185.2, 182579.0, 18…
## $ `2020_Yearly_Avg` <dbl> 570191.1, 219647.6, 253290.4, 354815.5, 194837.5, 19…
## $ `2021_Yearly_Avg` <dbl> 659276.7, 251779.1, 295079.0, 393297.9, 219989.5, 21…
## $ `2022_Yearly_Avg` <dbl> 748596.6, 296650.8, 368723.5, 428701.0, 240312.7, 23…
## $ `2023_Yearly_Avg` <dbl> 738915.4, 298911.6, 385000.4, 437658.2, 248603.2, 24…
# Zillow Average Home Prices by Region
allRegions_Zillow <- Zillow_new |>
select(-STATE) |>
group_by(REGION) |>
summarise_all(mean) |>
pivot_longer(cols = "2000_Yearly_Avg":"2023_Yearly_Avg",
names_to = c("Year_Column"),
values_to = "ZillowAvgHomePrice")
allRegions_Zillow$Year <- gsub("(\\d{4})_Yearly_Avg", "\\1", allRegions_Zillow$Year_Column)
allRegions_Zillow <- allRegions_Zillow |> select(-Year_Column)
glimpse(allRegions_Zillow)
## Rows: 96
## Columns: 3
## $ REGION <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ZillowAvgHomePrice <dbl> 142482.60, 157733.99, 176448.47, 199636.07, 224552.…
## $ Year <chr> "2000", "2001", "2002", "2003", "2004", "2005", "20…
# Convert to CSV
#write.csv(allRegions_Zillow, 'allRegions_Zillow.csv', row.names=FALSE)
Tidying Regional Data from The U.S. Bureau of Economic Analysis (BEA)
# Imported BEA Data
BEA_unclean <- read_csv('~/Desktop/Weylandt Project (Housing)/Raw Data /RegionalData.csv', show_col_types = FALSE)
glimpse(BEA_unclean)
## Rows: 1,071
## Columns: 27
## $ GeoFips <dbl> 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000…
## $ GeoName <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Al…
## $ LineCode <dbl> NA, 1, 2, 3, NA, 4, 5, 6, 7, NA, 8, 9, NA, 10, 11, 12, NA,…
## $ Description <chr> "Real dollar statistics", "Real GDP (millions of chained 2…
## $ `2000` <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "108002.9", "95839…
## $ `2001` <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "111894.1", "99375…
## $ `2002` <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "115007.7", "10390…
## $ `2003` <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "120241", "109568.…
## $ `2004` <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "128606.3", "11762…
## $ `2005` <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "136087.5", "12320…
## $ `2006` <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "144719", "130008"…
## $ `2007` <chr> NA, "(NA)", "(NA)", "(NA)", NA, "(NA)", "151942.6", "13620…
## $ `2008` <chr> NA, "(NA)", "199308.2", "164093.5", NA, "(NA)", "157780.3"…
## $ `2009` <chr> NA, "(NA)", "199810.9", "164430.3", NA, "(NA)", "155666.9"…
## $ `2010` <chr> NA, "(NA)", "199934.3", "163000.8", NA, "(NA)", "162068.8"…
## $ `2011` <chr> NA, "(NA)", "201384.9", "163835.5", NA, "(NA)", "167882.2"…
## $ `2012` <chr> NA, "(NA)", "201397.8", "163555", NA, "(NA)", "172101.5", …
## $ `2013` <chr> NA, "(NA)", "201172.1", "166493.3", NA, "(NA)", "173720.7"…
## $ `2014` <chr> NA, "(NA)", "205291.8", "169501.8", NA, "(NA)", "179487.1"…
## $ `2015` <chr> NA, "(NA)", "215147.9", "175050.6", NA, "(NA)", "187474.7"…
## $ `2016` <chr> NA, "(NA)", "214859.9", "177257.9", NA, "(NA)", "190871.8"…
## $ `2017` <dbl> NA, 216615.500, 220235.900, 181495.600, NA, 216615.500, 19…
## $ `2018` <dbl> NA, 220808.800, 231109.900, 190026.500, NA, 226263.800, 20…
## $ `2019` <dbl> NA, 224944.600, 235777.400, 193422.300, NA, 234526.400, 21…
## $ `2020` <dbl> NA, 222081.400, 252611.700, 191489.300, NA, 235118.300, 23…
## $ `2021` <dbl> NA, 231892.600, 264054.000, 206172.000, NA, 257986.500, 25…
## $ `2022` <dbl> NA, 235807.300, 253984.200, 211182.900, NA, 281569.000, 25…
# Disposable Income data extracted from BEA
Disposable_Income <- BEA_unclean |>
filter(LineCode == 6) |>
select(-c('Description', 'LineCode', 'GeoFips')) |>
mutate(across('2000':'2022', as.numeric)) |>
pivot_longer(c('2000':'2022'),
names_to = "Year",
values_to = "Average Disposable Income",
values_drop_na = FALSE) |>
pivot_wider(names_from = "Year", values_from = "Average Disposable Income")
colnames(Disposable_Income)[colnames(Disposable_Income) == "GeoName"] <- "STATE"
glimpse(Disposable_Income)
## Rows: 51
## Columns: 24
## $ STATE <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colo…
## $ `2000` <dbl> 95839.8, 17933.0, 118337.5, 54020.7, 932100.4, 124352.3, 118615…
## $ `2001` <dbl> 99375.0, 18968.7, 124820.6, 57065.8, 988772.1, 132523.8, 125846…
## $ `2002` <dbl> 103906.5, 20293.0, 132597.0, 59393.7, 1040694.4, 136160.2, 1312…
## $ `2003` <dbl> 109568.8, 21450.3, 141828.8, 63652.8, 1098063.6, 140456.1, 1343…
## $ `2004` <dbl> 117627.3, 22543.5, 154311.8, 68042.8, 1164414.7, 145983.9, 1410…
## $ `2005` <dbl> 123206.1, 23868.0, 167738.1, 70833.0, 1209975.6, 153717.8, 1460…
## $ `2006` <dbl> 130008.0, 25276.3, 184940.1, 75056.5, 1295546.9, 164602.5, 1569…
## $ `2007` <dbl> 136204.2, 26972.1, 195625.4, 79532.9, 1346007.1, 174261.5, 1668…
## $ `2008` <dbl> 141827.1, 29693.1, 201198.3, 83120.6, 1376342.8, 183754.7, 1806…
## $ `2009` <dbl> 143231.0, 30530.9, 195955.8, 83435.7, 1378288.7, 180175.7, 1864…
## $ `2010` <dbl> 148862.1, 32703.9, 198815.2, 86387.0, 1431825.6, 184323.2, 1919…
## $ `2011` <dbl> 152835.4, 34789.2, 206128.2, 91406.0, 1504645.4, 197828.9, 1943…
## $ `2012` <dbl> 157273.8, 35945.3, 214865.8, 97557.8, 1590509.7, 208823.5, 1963…
## $ `2013` <dbl> 157600.0, 35184.4, 218160.9, 96883.6, 1587112.7, 217662.1, 1871…
## $ `2014` <dbl> 162978.1, 37075.5, 228628.0, 102362.7, 1679406.9, 235238.8, 195…
## $ `2015` <dbl> 169520.5, 38075.3, 239543.4, 105216.0, 1786764.4, 243622.7, 199…
## $ `2016` <dbl> 172173.6, 37704.1, 250636.5, 107887.7, 1871763.1, 246133.3, 205…
## $ `2017` <dbl> 179134.7, 38525.4, 266666.0, 111671.5, 1961488.7, 264253.1, 210…
## $ `2018` <dbl> 187334.2, 40309.8, 283699.5, 117322.8, 2057748.6, 288001.5, 221…
## $ `2019` <dbl> 195208.0, 41045.3, 302159.1, 119309.6, 2169277.3, 307696.4, 227…
## $ `2020` <dbl> 210513.8, 42025.7, 337272.5, 129589.1, 2368785.6, 330301.7, 236…
## $ `2021` <dbl> 228206.2, 44143.4, 364326.7, 141244.9, 2520230.0, 363127.6, 242…
## $ `2022` <dbl> 229599.5, 45685.6, 377143.3, 142906.9, 2464043.4, 378514.4, 244…
# Consumer Spending data extracted from BEA
Consumer_Spending <- BEA_unclean |>
filter(LineCode == 7) |>
select(-c('Description', 'LineCode', 'GeoFips')) |>
mutate(across('2000':'2022', as.numeric)) |>
pivot_longer(c('2000':'2022'),
names_to = "Year",
values_to = "Personal Consumption Expenditures",
values_drop_na = FALSE) |>
pivot_wider(names_from = "Year", values_from = "Personal Consumption Expenditures")
colnames(Consumer_Spending)[colnames(Consumer_Spending) == "GeoName"] <- "STATE"
glimpse(Consumer_Spending)
## Rows: 51
## Columns: 24
## $ STATE <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colo…
## $ `2000` <dbl> 89748.7, 17107.7, 121418.9, 50566.1, 844374.3, 117037.5, 100901…
## $ `2001` <dbl> 93232.7, 18123.5, 127277.9, 52796.3, 886833.1, 123305.4, 105264…
## $ `2002` <dbl> 97220.1, 19432.2, 133667.9, 54843.4, 924960.4, 126789.8, 109481…
## $ `2003` <dbl> 102106.6, 20557.2, 142860.7, 57788.1, 975445.5, 132387.1, 11506…
## $ `2004` <dbl> 108428.7, 21847.5, 155367.0, 61297.4, 1041764.2, 140464.2, 1220…
## $ `2005` <dbl> 115631.1, 23354.8, 170011.9, 65560.6, 1114921.0, 148825.1, 1294…
## $ `2006` <dbl> 121487.4, 24275.1, 184063.5, 69198.3, 1186034.5, 158054.6, 1361…
## $ `2007` <dbl> 126916.4, 25617.0, 194363.4, 72561.3, 1256410.5, 167441.9, 1417…
## $ `2008` <dbl> 130009.5, 26749.3, 197557.7, 75160.6, 1295036.3, 173274.1, 1459…
## $ `2009` <dbl> 128265.5, 27166.7, 190868.2, 74484.9, 1253678.5, 170721.1, 1429…
## $ `2010` <dbl> 132314.4, 28537.5, 196474.0, 77458.3, 1297976.1, 175992.9, 1467…
## $ `2011` <dbl> 136772.4, 29947.6, 203580.7, 80992.6, 1354341.6, 184841.9, 1509…
## $ `2012` <dbl> 139950.6, 31039.1, 209936.3, 83237.7, 1409173.4, 189763.8, 1548…
## $ `2013` <dbl> 143956.7, 31673.2, 216830.0, 85154.6, 1453927.1, 198951.0, 1585…
## $ `2014` <dbl> 148385.6, 32641.4, 226151.5, 88584.1, 1522216.3, 210765.5, 1644…
## $ `2015` <dbl> 152790.5, 33274.4, 234755.7, 91366.0, 1595724.1, 222464.3, 1686…
## $ `2016` <dbl> 157806.1, 33976.4, 243679.6, 94866.1, 1665899.0, 233801.6, 1716…
## $ `2017` <dbl> 163523.4, 34926.8, 259029.1, 98052.5, 1752123.2, 247983.2, 1770…
## $ `2018` <dbl> 169460.2, 36077.1, 273816.1, 102359.8, 1856117.3, 261807.9, 183…
## $ `2019` <dbl> 176843.5, 36844.1, 286488.5, 104313.8, 1938018.7, 275707.0, 186…
## $ `2020` <dbl> 175481.9, 35006.0, 291542.5, 105433.5, 1885028.5, 273942.3, 184…
## $ `2021` <dbl> 197820.2, 39632.5, 334350.1, 118796.2, 2146539.4, 314541.5, 204…
## $ `2022` <dbl> 215104.6, 43412.8, 368866.6, 128662.4, 2352361.6, 346723.2, 219…
# Disposable Income data by Region
allRegions_DisposableIncome <- Disposable_Income |>
inner_join(region_state, join_by("STATE"=="STATE")) |> # From Census Population Estimate data
select(REGION,'2000':'2022') |>
group_by(REGION) |>
summarise_all(mean) |>
pivot_longer(cols = "2000":"2022",
names_to = c("Year_Column"),
values_to = "Disposable_Income")
allRegions_DisposableIncome$Year <- gsub("POPESTIMATE(\\d{4})", "\\1", allRegions_DisposableIncome$Year_Column)
allRegions_DisposableIncome <- allRegions_DisposableIncome |> select(-Year_Column)
glimpse(allRegions_DisposableIncome)
## Rows: 92
## Columns: 3
## $ REGION <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Disposable_Income <dbl> 177084.0, 184665.9, 192364.1, 198596.9, 208179.7, 21…
## $ Year <chr> "2000", "2001", "2002", "2003", "2004", "2005", "200…
# Consumer Spending data by Region
allRegions_ConsumerSpending <- Consumer_Spending |>
inner_join(region_state, join_by("STATE"=="STATE")) |> # From Census Population Estimate data
select(REGION,'2000':'2022') |>
group_by(REGION) |>
summarise_all(mean) |>
pivot_longer(cols = "2000":"2022",
names_to = c("Year_Column"),
values_to = "Consumer_Spending")
allRegions_ConsumerSpending$Year <- gsub("POPESTIMATE(\\d{4})", "\\1", allRegions_ConsumerSpending$Year_Column)
allRegions_ConsumerSpending <- allRegions_ConsumerSpending |> select(-Year_Column)
glimpse(allRegions_ConsumerSpending)
## Rows: 92
## Columns: 3
## $ REGION <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Consumer_Spending <dbl> 156018.2, 163250.9, 169863.4, 179593.7, 190131.8, 20…
## $ Year <chr> "2000", "2001", "2002", "2003", "2004", "2005", "200…
# Convert to CSV
#write.csv(allRegions_DisposableIncome, 'allRegions_DisposableIncome.csv', row.names=FALSE)
#write.csv(allRegions_ConsumerSpending, 'allRegions_ConsumerSpending.csv', row.names=FALSE)
Tidying Sales of Single-Family Homes data
# Sales of Single-Family Homes - Northeast Region
Northeast_SalesSF <- read_csv('~/Desktop/Weylandt Project (Housing)/Raw Data /New One Family Houses Sold in Northeast Census Region, Thousands, Seasonally Adjusted Annual Rate .csv', show_col_types = FALSE) |>
rename('NorthEast'= HSN1FNE) |>
rename('Year'= DATE)
Northeast_SalesSF$Year <- format(Northeast_SalesSF$Year, "%Y")
Northeast_SalesSF$NorthEast <- round(Northeast_SalesSF$NorthEast * 1000)
Northeast_Sales_new <- Northeast_SalesSF |>
pivot_wider(names_from = Year, values_from = NorthEast) |>
cbind(REGION = '1') |>
select(REGION, '2000':'2023')
glimpse(Northeast_Sales_new)
## Rows: 1
## Columns: 25
## $ REGION <chr> "1"
## $ `2000` <dbl> 71250
## $ `2001` <dbl> 65417
## $ `2002` <dbl> 64750
## $ `2003` <dbl> 80083
## $ `2004` <dbl> 82167
## $ `2005` <dbl> 81417
## $ `2006` <dbl> 63583
## $ `2007` <dbl> 64083
## $ `2008` <dbl> 35333
## $ `2009` <dbl> 31417
## $ `2010` <dbl> 30667
## $ `2011` <dbl> 21167
## $ `2012` <dbl> 28833
## $ `2013` <dbl> 30583
## $ `2014` <dbl> 27750
## $ `2015` <dbl> 24750
## $ `2016` <dbl> 32250
## $ `2017` <dbl> 39833
## $ `2018` <dbl> 32333
## $ `2019` <dbl> 30333
## $ `2020` <dbl> 37667
## $ `2021` <dbl> 35833
## $ `2022` <dbl> 32583
## $ `2023` <dbl> 33583
# Sales of Single-Family Homes - Midwest Region
Midwest_SalesSF <- read_csv('~/Desktop/Weylandt Project (Housing)/Raw Data /New One Family Houses Sold in Midwest Census Region, Thousands, Seasonally Adjusted Annual Rate.csv', show_col_types = FALSE) |>
rename('MidWest'= HSN1FMW) |>
rename('Year'= DATE)
Midwest_SalesSF$Year <- format(Midwest_SalesSF$Year, "%Y")
Midwest_SalesSF$MidWest <- round(Midwest_SalesSF$MidWest * 1000)
Midwest_Sales_new <- Midwest_SalesSF |>
pivot_wider(names_from = Year, values_from = MidWest) |>
cbind(REGION = '2') |>
select(REGION, '2000':'2023')
glimpse(Midwest_Sales_new)
## Rows: 1
## Columns: 25
## $ REGION <chr> "2"
## $ `2000` <dbl> 155833
## $ `2001` <dbl> 163500
## $ `2002` <dbl> 188417
## $ `2003` <dbl> 188750
## $ `2004` <dbl> 211000
## $ `2005` <dbl> 203167
## $ `2006` <dbl> 161333
## $ `2007` <dbl> 117833
## $ `2008` <dbl> 69250
## $ `2009` <dbl> 54167
## $ `2010` <dbl> 44500
## $ `2011` <dbl> 45167
## $ `2012` <dbl> 47250
## $ `2013` <dbl> 60833
## $ `2014` <dbl> 58250
## $ `2015` <dbl> 61167
## $ `2016` <dbl> 69417
## $ `2017` <dbl> 71833
## $ `2018` <dbl> 75167
## $ `2019` <dbl> 72417
## $ `2020` <dbl> 93250
## $ `2021` <dbl> 84250
## $ `2022` <dbl> 65250
## $ `2023` <dbl> 68167
# Sales of Single-Family Homes - South Region
South_SalesSF <- read_csv("~/Desktop/Weylandt Project (Housing)/Raw Data /New One Family Houses Sold in South Census Region, Thousands, Seasonally Adjusted Annual Rate .csv", show_col_types = FALSE) |>
rename('South'= HSN1FS) |>
rename('Year'= DATE)
South_SalesSF$Year <- format(South_SalesSF$Year, "%Y")
South_SalesSF$South <- round(South_SalesSF$South * 1000)
South_Sales_new <- South_SalesSF |>
pivot_wider(names_from = Year, values_from = South) |>
cbind(REGION = '3') |>
select(REGION, '2000':'2023')
glimpse(South_Sales_new)
## Rows: 1
## Columns: 25
## $ REGION <chr> "3"
## $ `2000` <dbl> 407083
## $ `2001` <dbl> 439000
## $ `2002` <dbl> 450417
## $ `2003` <dbl> 513417
## $ `2004` <dbl> 561000
## $ `2005` <dbl> 638167
## $ `2006` <dbl> 558917
## $ `2007` <dbl> 408750
## $ `2008` <dbl> 264750
## $ `2009` <dbl> 201667
## $ `2010` <dbl> 172500
## $ `2011` <dbl> 168167
## $ `2012` <dbl> 195417
## $ `2013` <dbl> 233083
## $ `2014` <dbl> 244583
## $ `2015` <dbl> 286083
## $ `2016` <dbl> 316667
## $ `2017` <dbl> 339583
## $ `2018` <dbl> 347333
## $ `2019` <dbl> 400417
## $ `2020` <dbl> 481750
## $ `2021` <dbl> 452167
## $ `2022` <dbl> 391000
## $ `2023` <dbl> 411333
# Sales of Single-Family Homes - West Region
West_SalesSF <- read_csv('~/Desktop/Weylandt Project (Housing)/Raw Data /New One Family Houses Sold in West Census Region, Thousands, Seasonally Adjusted Annual Rate .csv', show_col_types = FALSE) |>
rename('West'= HSN1FW) |>
rename('Year'= DATE)
West_SalesSF$Year <- format(West_SalesSF$Year, "%Y")
West_SalesSF$West <- round(West_SalesSF$West * 1000)
West_Sales_new <- West_SalesSF |>
pivot_wider(names_from = Year, values_from = West) |>
cbind(REGION = '4') |>
select(REGION, '2000':'2023')
glimpse(West_Sales_new)
## Rows: 1
## Columns: 25
## $ REGION <chr> "4"
## $ `2000` <dbl> 246083
## $ `2001` <dbl> 239333
## $ `2002` <dbl> 272583
## $ `2003` <dbl> 308417
## $ `2004` <dbl> 346667
## $ `2005` <dbl> 356167
## $ `2006` <dbl> 265500
## $ `2007` <dbl> 178000
## $ `2008` <dbl> 112833
## $ `2009` <dbl> 87000
## $ `2010` <dbl> 73583
## $ `2011` <dbl> 71917
## $ `2012` <dbl> 96583
## $ `2013` <dbl> 105333
## $ `2014` <dbl> 109667
## $ `2015` <dbl> 130583
## $ `2016` <dbl> 142167
## $ `2017` <dbl> 163417
## $ `2018` <dbl> 159167
## $ `2019` <dbl> 182000
## $ `2020` <dbl> 220083
## $ `2021` <dbl> 197000
## $ `2022` <dbl> 148083
## $ `2023` <dbl> 153417
# Sales of Single-Family Homes - All Regions
allRegions_SalesSFH <- bind_rows(Northeast_Sales_new,Midwest_Sales_new,South_Sales_new,West_Sales_new) |>
pivot_longer(cols = "2000":"2023",
names_to = c("Year_Column"),
values_to = "SFH_Sales")
allRegions_SalesSFH$Year <- gsub("(\\d{4})_Yearly_Avg", "\\1", allRegions_SalesSFH$Year_Column)
allRegions_SalesSFH <- allRegions_SalesSFH |> select(-Year_Column)
glimpse(allRegions_SalesSFH)
## Rows: 96
## Columns: 3
## $ REGION <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", …
## $ SFH_Sales <dbl> 71250, 65417, 64750, 80083, 82167, 81417, 63583, 64083, 3533…
## $ Year <chr> "2000", "2001", "2002", "2003", "2004", "2005", "2006", "200…
# Convert to CSV
#write.csv(allRegions_SalesSFH, 'allRegions_SalesSFH.csv', row.names=FALSE)
allRegions_PopEstimates <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_PopEstimates.csv", show_col_types = FALSE)
allRegions_Zillow <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_Zillow.csv", show_col_types = FALSE)
# Joining All Regions Population Estimates data and All Regions Zillow Average Home Prices data
Zillow_PopulationEstimates <- inner_join(allRegions_Zillow,allRegions_PopEstimates, join_by(REGION,Year))
glimpse(Zillow_PopulationEstimates)
## Rows: 96
## Columns: 4
## $ REGION <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ZillowAvgHomePrice <dbl> 142482.60, 157733.99, 176448.47, 199636.07, 224552.…
## $ Year <dbl> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 200…
## $ Population <dbl> 5962922, 5990614, 6015991, 6037161, 6047059, 605013…
# Visualization of "Changes In Home Prices and Population Estimates from 2000-2023"
ggplot(Zillow_PopulationEstimates, aes(x=Population, y=ZillowAvgHomePrice, color=Year)) +
geom_point() +
scale_y_continuous(labels = scales::dollar) +
scale_x_continuous(labels = scales::comma) +
facet_grid(~REGION, labeller = labeller(REGION = c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West"))) +
theme_bw() +
theme(legend.position = 'bottom', axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Zillow Average Home Price") +
xlab("Population Estimates") +
ggtitle("Changes In Home Price And Population Estimates (2000-2023)") +
scale_color_gradient(low="blue", high="red") +
theme(legend.key.width = unit(1.5,'cm'))
# Correlation test between Population Estimates and Zillow Average Home Prices
cor.test(Zillow_PopulationEstimates$ZillowAvgHomePrice,Zillow_PopulationEstimates$Population)
##
## Pearson's product-moment correlation
##
## data: Zillow_PopulationEstimates$ZillowAvgHomePrice and Zillow_PopulationEstimates$Population
## t = 2.1823, df = 94, p-value = 0.03158
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.01998641 0.40236423
## sample estimates:
## cor
## 0.2195924
# All States Population Estimates
All_PopEstimates_longer <- pop_estimate |>
pivot_longer(cols = "POPESTIMATE2000":"POPESTIMATE2023",
names_to = c("YearlyPopEstimate"),
values_to = "Population")
All_PopEstimates_longer$Year <- gsub("POPESTIMATE(\\d{4})", "\\1", All_PopEstimates_longer$YearlyPopEstimate)
All_PopEstimates_longer <- All_PopEstimates_longer |> select(-YearlyPopEstimate)
glimpse(All_PopEstimates_longer)
## Rows: 1,224
## Columns: 4
## $ REGION <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ STATE <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Ala…
## $ Population <int> 4452173, 4467634, 4480089, 4503491, 4530729, 4569805, 46289…
## $ Year <chr> "2000", "2001", "2002", "2003", "2004", "2005", "2006", "20…
# All States Zillow Average Home Prices
All_Zillow_Longer <- Zillow_new |>
pivot_longer(cols = "2000_Yearly_Avg":"2023_Yearly_Avg",
names_to = c("Year_Column"),
values_to = "ZillowAvgHomePrice")
All_Zillow_Longer$Year <- gsub("(\\d{4})_Yearly_Avg", "\\1", All_Zillow_Longer$Year_Column)
All_Zillow_Longer <- All_Zillow_Longer |> select(-Year_Column)
glimpse(All_Zillow_Longer)
## Rows: 1,224
## Columns: 4
## $ REGION <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ STATE <chr> "California", "California", "California", "Californ…
## $ ZillowAvgHomePrice <dbl> 200193.0, 228074.6, 256343.3, 298431.0, 362059.5, 4…
## $ Year <chr> "2000", "2001", "2002", "2003", "2004", "2005", "20…
# Region 3 - South
All_South_Population <- All_PopEstimates_longer |> filter(REGION == 3)
# South Population Visual
ggplot(All_South_Population, aes(x=Year,y=Population)) +
geom_point(aes(color=STATE)) +
scale_y_continuous(labels = scales::comma) +
scale_x_discrete(labels = NULL) +
facet_wrap(~STATE) +
theme(legend.position = 'none') +
ggtitle("Changes In South Population Estimates (2000-2023)")
# South Population Percent Change
All_SouthPop_PerChg_2000to2023 <- All_South_Population |>
filter(Year %in% c(2000,2023)) |>
group_by(STATE) |>
summarise(Percent_Change = (Population[Year == 2023] / Population[Year == 2000] - 1) * 100)
All_SouthPop_PerChg_2000to2023
## # A tibble: 17 × 2
## STATE Percent_Change
## <chr> <dbl>
## 1 Alabama 14.7
## 2 Arkansas 14.5
## 3 Delaware 31.2
## 4 District of Columbia 18.7
## 5 Florida 40.9
## 6 Georgia 34.1
## 7 Kentucky 11.8
## 8 Louisiana 2.28
## 9 Maryland 16.4
## 10 Mississippi 3.21
## 11 North Carolina 34.1
## 12 Oklahoma 17.4
## 13 South Carolina 33.5
## 14 Tennessee 24.9
## 15 Texas 45.6
## 16 Virginia 22.7
## 17 West Virginia -2.04
All_South_Zillow <- All_Zillow_Longer |> filter(REGION == 3)
# South Home Price Visual
ggplot(All_South_Zillow, aes(x=Year,y=ZillowAvgHomePrice)) +
geom_point(aes(color=STATE)) +
scale_y_continuous(labels = scales::dollar) +
scale_x_discrete(labels = NULL) +
facet_wrap(~STATE) +
theme(legend.position = 'none') +
ylab("Zillow Average Home Price") +
ggtitle("Changes In South Home Values (2000-2023)")
# South Home Price Percent Change
All_SouthZillow_PerChg_2000to2023 <- All_South_Zillow |>
filter(Year %in% c(2000,2023)) |>
group_by(STATE) |>
summarise(Percent_Change = (ZillowAvgHomePrice[Year == 2023] / ZillowAvgHomePrice[Year == 2000] - 1) * 100)
All_SouthZillow_PerChg_2000to2023
## # A tibble: 17 × 2
## STATE Percent_Change
## <chr> <dbl>
## 1 Alabama 122.
## 2 Arkansas 128.
## 3 Delaware 147.
## 4 District of Columbia 267.
## 5 Florida 254.
## 6 Georgia 150.
## 7 Kentucky 129.
## 8 Louisiana 93.2
## 9 Maryland 162.
## 10 Mississippi 102.
## 11 North Carolina 144.
## 12 Oklahoma 139.
## 13 South Carolina 149.
## 14 Tennessee 177.
## 15 Texas 169.
## 16 Virginia 192.
## 17 West Virginia 111.
# Region 4 - West
All_West_Population <- All_PopEstimates_longer |> filter(REGION == 4)
# West Population Visual
ggplot(All_West_Population, aes(x=Year,y=Population)) +
geom_point(aes(color=STATE)) +
scale_y_continuous(labels = scales::comma) +
scale_x_discrete(labels = NULL) +
facet_wrap(~STATE) +
theme(legend.position = 'none') +
ggtitle("Changes In West Population Estimates (2000-2023)")
# West Population Percent Change
All_WestPop_PerChg_2000to2023 <- All_West_Population |>
filter(Year %in% c(2000,2023)) |>
group_by(STATE) |>
summarise(Percent_Change = (Population[Year == 2023] / Population[Year == 2000] - 1) * 100)
All_WestPop_PerChg_2000to2023
## # A tibble: 13 × 2
## STATE Percent_Change
## <chr> <dbl>
## 1 Alaska 16.8
## 2 Arizona 44.0
## 3 California 14.6
## 4 Colorado 35.8
## 5 Hawaii 18.3
## 6 Idaho 51.2
## 7 Montana 25.3
## 8 Nevada 58.2
## 9 New Mexico 16.1
## 10 Oregon 23.4
## 11 Utah 52.3
## 12 Washington 32.2
## 13 Wyoming 18.2
All_West_Zillow <- All_Zillow_Longer |> filter(REGION == 4)
# West Home Price Visual
ggplot(All_West_Zillow, aes(x=Year,y=ZillowAvgHomePrice)) +
geom_point(aes(color=STATE)) +
scale_y_continuous(labels = scales::dollar) +
scale_x_discrete(labels = NULL) +
facet_wrap(~STATE) +
theme(legend.position = 'none') +
ylab("Zillow Average Home Price") +
ggtitle("Changes In West Home Values (2000-2023)")
# West Home Price Percent Change
All_WestZillow_PerChg_2000to2023 <- All_West_Zillow |>
filter(Year %in% c(2000,2023)) |>
group_by(STATE) |>
summarise(Percent_Change = (ZillowAvgHomePrice[Year == 2023] / ZillowAvgHomePrice[Year == 2000] - 1) * 100)
All_WestZillow_PerChg_2000to2023
## # A tibble: 13 × 2
## STATE Percent_Change
## <chr> <dbl>
## 1 Alaska 162.
## 2 Arizona 205.
## 3 California 269.
## 4 Colorado 192.
## 5 Hawaii 319.
## 6 Idaho 261.
## 7 Montana Inf
## 8 Nevada 168.
## 9 New Mexico Inf
## 10 Oregon 217.
## 11 Utah 195.
## 12 Washington 219.
## 13 Wyoming Inf
allRegions_SalesSFH <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_SalesSFH.csv", show_col_types = FALSE)
# Visualization of Sales Of Single-Family Homes
ggplot(allRegions_SalesSFH, aes(x=Year,y=SFH_Sales,color=REGION)) +
geom_line() +
scale_y_continuous(labels = scales::comma) +
facet_grid(~REGION, labeller = labeller(REGION = c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West"))) +
ylab("Single-Family House Sales") +
xlab("Years") +
ggtitle("Changes In Single-Family House Sales (2000-2023)") +
theme_gray() +
theme(legend.position = 'none')
allRegions_Zillow <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_Zillow.csv", show_col_types = FALSE)
allRegions_DisposableIncome <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_DisposableIncome.csv", show_col_types = FALSE)
# Joining All Regions Zillow Average Home Price data and All Regions Disposable Personal Income data
Zillow_DisposableIncome=inner_join(allRegions_Zillow,allRegions_DisposableIncome,join_by('REGION','Year'))
glimpse(Zillow_DisposableIncome)
## Rows: 92
## Columns: 4
## $ REGION <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ZillowAvgHomePrice <dbl> 142482.60, 157733.99, 176448.47, 199636.07, 224552.…
## $ Year <dbl> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 200…
## $ Disposable_Income <dbl> 177084.0, 184665.9, 192364.1, 198596.9, 208179.7, 2…
# Visualization of Changes In Home Price And Disposable Income
ggplot(Zillow_DisposableIncome,aes(x=Disposable_Income,y=ZillowAvgHomePrice,color=Year))+
geom_point() +
scale_y_continuous(labels = scales::dollar) +
scale_x_continuous(labels = scales::dollar) +
facet_grid(~REGION, labeller = labeller(REGION = c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West"))) +
theme_bw() +
theme(legend.position = 'bottom', axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Zillow Average Home Price") +
xlab("Disposable Income") +
ggtitle("Changes In Home Price And Disposable Income (2000-2022)") +
scale_color_gradient(low="blue", high="red") +
theme(legend.key.width = unit(1.5,'cm'))
# Correlation test between Disposable Income and Zillow Average Home Prices
cor.test(Zillow_DisposableIncome$Disposable_Income,Zillow_DisposableIncome$ZillowAvgHomePrice)
##
## Pearson's product-moment correlation
##
## data: Zillow_DisposableIncome$Disposable_Income and Zillow_DisposableIncome$ZillowAvgHomePrice
## t = 9.7777, df = 90, p-value = 8.275e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6012713 0.8042912
## sample estimates:
## cor
## 0.717703
# Permutation test to further test the correlation
# Null Distribution - intended to have no relationship by design
replicate(5000, {
Zillow_DisposableIncome |>
mutate(ZillowAvgHomePrice = sample(ZillowAvgHomePrice)) |>
summarize(corr=cor(ZillowAvgHomePrice,Disposable_Income)) |>
pull(corr)
}) -> T_corr
hist(T_corr)
# Observed (Same number from the correlation test above)
Zillow_DisposableIncome |>
#mutate(ZillowAvgHomePrice = sample(ZillowAvgHomePrice)) |>
summarize(corr=cor(ZillowAvgHomePrice,Disposable_Income)) |>
pull(corr)
## [1] 0.717703
# When the observed is very inconsistent with the null distribution, we can reject the null distribution. The observed data is so unlikely to have occurred if the null were true.
allRegions_Zillow <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_Zillow.csv", show_col_types = FALSE)
allRegions_ConsumerSpending <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_ConsumerSpending.csv", show_col_types = FALSE)
# Joining All Regions Zillow Average Home Price data and All Regions Consumer Spending data
Zillow_ConsumerSpending <- inner_join(allRegions_Zillow,allRegions_ConsumerSpending, join_by('REGION','Year'))
glimpse(Zillow_ConsumerSpending)
## Rows: 92
## Columns: 4
## $ REGION <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ ZillowAvgHomePrice <dbl> 142482.60, 157733.99, 176448.47, 199636.07, 224552.…
## $ Year <dbl> 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 200…
## $ Consumer_Spending <dbl> 156018.2, 163250.9, 169863.4, 179593.7, 190131.8, 2…
# Visualization of Changes In Home Price And Consumer Spending
ggplot(Zillow_ConsumerSpending, aes(y=ZillowAvgHomePrice,x=Consumer_Spending,color=Year)) +
geom_point() +
scale_y_continuous(labels = scales::dollar) +
scale_x_continuous(labels = scales::dollar) +
facet_grid(~REGION, labeller = labeller(REGION = c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West"))) +
theme_bw() +
theme(legend.position = 'bottom', axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Zillow Average Home Price") +
xlab("Consumer Spending") +
ggtitle("Changes In Home Price And Consumer Spending (2000-2022)") +
scale_color_gradient(low="blue", high="red") +
theme(legend.key.width = unit(1.5,'cm'))
# Correlation test between Consumer Spending and Zillow Average Home Prices
cor.test(Zillow_ConsumerSpending$ZillowAvgHomePrice,Zillow_ConsumerSpending$Consumer_Spending)
##
## Pearson's product-moment correlation
##
## data: Zillow_ConsumerSpending$ZillowAvgHomePrice and Zillow_ConsumerSpending$Consumer_Spending
## t = 10.106, df = 90, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6163133 0.8125708
## sample estimates:
## cor
## 0.7290958
# Permutation test to further test the correlation
# Null Distribution - intended to have no relationship by design
replicate(5000, {
Zillow_ConsumerSpending |>
mutate(ZillowAvgHomePrice = sample(ZillowAvgHomePrice)) |>
summarize(corr=cor(ZillowAvgHomePrice,Consumer_Spending)) |>
pull(corr)
}) -> T_corr
hist(T_corr)
# Observed (Same number from the correlation test above)
Zillow_ConsumerSpending |>
#mutate(ZillowAvgHomePrice = sample(ZillowAvgHomePrice)) |>
summarize(corr=cor(ZillowAvgHomePrice,Consumer_Spending)) |>
pull(corr)
## [1] 0.7290958
# When the observed is very inconsistent with the null distribution, we can reject the null distribution. The observed data is so unlikely to have occurred if the null were true.
consumerSpending <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_ConsumerSpending.csv", show_col_types = FALSE)
disposableIncome <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_DisposableIncome.csv", show_col_types = FALSE)
popEstimates <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_PopEstimates.csv", show_col_types = FALSE)
salesSFH <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_SalesSFH.csv", show_col_types = FALSE)
zillowData <- read_csv("~/Desktop/Weylandt Project (Housing)/Project csv files/allRegions_Zillow.csv", show_col_types = FALSE)
mortgage15 <- read_csv("~/Desktop/Weylandt Project (Housing)/Raw Data /MORTGAGE15US.csv", show_col_types = FALSE)
mortgage30 <- read_csv("~/Desktop/Weylandt Project (Housing)/Raw Data /MORTGAGE30US.csv", show_col_types = FALSE)
unemployment<- read_csv("~/Desktop/Weylandt Project (Housing)/Raw Data /UNRATE.csv", show_col_types = FALSE)
# Merging all data-sets into one comprehensive data-frame, with emphasis on years 2005 to 2013
economic_data <- reduce(list(consumerSpending, disposableIncome, popEstimates, salesSFH, zillowData), full_join, by = c("REGION", "Year"))
economic_data_crisis <- filter(economic_data, Year >= 2005 & Year <= 2013)
# Apply region mapping
region_mapping <- c('1' = 'Northeast', '2' = 'Midwest', '3' = 'South', '4' = 'West')
economic_data_crisis$REGION <- factor(economic_data_crisis$REGION, levels = names(region_mapping), labels = region_mapping)
glimpse(economic_data_crisis)
## Rows: 36
## Columns: 7
## $ REGION <fct> Northeast, Northeast, Northeast, Northeast, Northea…
## $ Consumer_Spending <dbl> 201264.7, 211507.9, 220690.2, 227822.0, 225554.8, 2…
## $ Year <dbl> 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 201…
## $ Disposable_Income <dbl> 213357.4, 226112.3, 238219.6, 247891.2, 252763.9, 2…
## $ Population <dbl> 6050137, 6058073, 6072596, 6097325, 6125900, 615122…
## $ SFH_Sales <dbl> 81417, 63583, 64083, 35333, 31417, 30667, 21167, 28…
## $ ZillowAvgHomePrice <dbl> 251367.6, 267143.3, 265116.4, 254258.0, 236936.7, 2…
# Home Price & Population Trends
# Create separate data frames for home prices and population
home_prices <- economic_data_crisis[c("REGION", "Year", "ZillowAvgHomePrice")]
population <- economic_data_crisis[c("REGION", "Year", "Population")]
# Plot home price trends during 2008 crisis
home_price_plot <- ggplot(home_prices, aes(x = Year, y = ZillowAvgHomePrice, color = factor(REGION))) +
geom_line() +
labs(title = "Home Price Trends (2005-2013)", y = "Home Price", color = "Region") +
theme_minimal() +
scale_y_continuous(labels = dollar)
home_price_plot
# Home price percent change 2008 crisis
home_price_percentChange <- home_prices|>
group_by(REGION) |>
mutate(Previous_ZillowAvgHomePrice = lag(ZillowAvgHomePrice, 1)) |>
mutate(Percent_Change=((ZillowAvgHomePrice-Previous_ZillowAvgHomePrice)/Previous_ZillowAvgHomePrice)*100) |>
select(-ZillowAvgHomePrice,-Previous_ZillowAvgHomePrice)
print(home_price_percentChange, n=36)
## # A tibble: 36 × 3
## # Groups: REGION [4]
## REGION Year Percent_Change
## <fct> <dbl> <dbl>
## 1 Northeast 2005 NA
## 2 Northeast 2006 6.28
## 3 Northeast 2007 -0.759
## 4 Northeast 2008 -4.10
## 5 Northeast 2009 -6.81
## 6 Northeast 2010 -2.01
## 7 Northeast 2011 -3.63
## 8 Northeast 2012 -2.30
## 9 Northeast 2013 2.50
## 10 Midwest 2005 NA
## 11 Midwest 2006 3.69
## 12 Midwest 2007 0.706
## 13 Midwest 2008 -2.84
## 14 Midwest 2009 4.64
## 15 Midwest 2010 -2.15
## 16 Midwest 2011 -4.22
## 17 Midwest 2012 -0.846
## 18 Midwest 2013 6.27
## 19 South 2005 NA
## 20 South 2006 9.90
## 21 South 2007 2.25
## 22 South 2008 -3.73
## 23 South 2009 -6.17
## 24 South 2010 -2.84
## 25 South 2011 -4.01
## 26 South 2012 -0.490
## 27 South 2013 4.97
## 28 West 2005 NA
## 29 West 2006 13.9
## 30 West 2007 2.95
## 31 West 2008 -6.12
## 32 West 2009 -10.7
## 33 West 2010 -4.35
## 34 West 2011 -5.20
## 35 West 2012 -0.0808
## 36 West 2013 9.16
# Plot population trends during 2008 crisis
population_plot <- ggplot(population, aes(x = Year, y = Population, color = factor(REGION))) +
geom_line() +
labs(title = "Population Trends (2005-2013)", y = "Population", color = "Region") +
theme_minimal() +
scale_y_continuous(labels = comma)
population_plot
# Population percent change 2008 crisis
population_percentChange <- population |>
group_by(REGION) |>
mutate(Previous_pop = lag(Population, 1)) |>
mutate(Percent_Change=((Population-Previous_pop)/Previous_pop)*100) |>
select(-Population,-Previous_pop)
print(population_percentChange, n=36)
## # A tibble: 36 × 3
## # Groups: REGION [4]
## REGION Year Percent_Change
## <fct> <dbl> <dbl>
## 1 Northeast 2005 NA
## 2 Northeast 2006 0.131
## 3 Northeast 2007 0.240
## 4 Northeast 2008 0.407
## 5 Northeast 2009 0.469
## 6 Northeast 2010 0.413
## 7 Northeast 2011 0.439
## 8 Northeast 2012 0.308
## 9 Northeast 2013 0.227
## 10 Midwest 2005 NA
## 11 Midwest 2006 0.421
## 12 Midwest 2007 0.402
## 13 Midwest 2008 0.347
## 14 Midwest 2009 0.337
## 15 Midwest 2010 0.341
## 16 Midwest 2011 0.272
## 17 Midwest 2012 0.266
## 18 Midwest 2013 0.332
## 19 South 2005 NA
## 20 South 2006 1.49
## 21 South 2007 1.48
## 22 South 2008 1.35
## 23 South 2009 1.22
## 24 South 2010 1.16
## 25 South 2011 0.993
## 26 South 2012 1.06
## 27 South 2013 0.958
## 28 West 2005 NA
## 29 West 2006 1.35
## 30 West 2007 1.23
## 31 West 2008 1.31
## 32 West 2009 1.18
## 33 West 2010 1.13
## 34 West 2011 0.889
## 35 West 2012 0.947
## 36 West 2013 0.938
# Arrange plots side by side
multi_panel_plot <- plot_grid(home_price_plot, population_plot, ncol = 2)
multi_panel_plot
# Economic Indicators Over Time (2005-2013)
ggplot(economic_data_crisis, aes(x = Year)) +
geom_line(aes(y = Consumer_Spending, color = "Consumer Spending (USD)")) +
geom_line(aes(y = Disposable_Income, color = "Disposable Income (USD)")) +
geom_line(aes(y = ZillowAvgHomePrice, color = "Home Prices (USD)")) +
facet_wrap(~REGION) +
labs(title = "Economic Indicators Over Time (2005-2013)", y = "Value") +
scale_color_manual(values = c("Consumer Spending (USD)" = "blue",
"Disposable Income (USD)" = "green",
"Home Prices (USD)" = "red")) +
theme_minimal() +
guides(color = guide_legend(title = "Indicators")) +
scale_y_continuous(labels = comma)
# Mortgage rates from 2000-2010
# Extracting year from the DATE column for merging
mortgage15$Year <- format(as.Date(mortgage15$DATE), "%Y")
mortgage30$Year <- format(as.Date(mortgage30$DATE), "%Y")
# Convert Year to numeric for consistency
mortgage15$Year <- as.numeric(mortgage15$Year)
mortgage30$Year <- as.numeric(mortgage30$Year)
# Aggregating average yearly mortgage rates before merging
avg_mortgage15 <- mortgage15 %>%
group_by(Year) %>%
summarise(AvgMortgage15Rate = mean(MORTGAGE15US))
avg_mortgage30 <- mortgage30 %>%
group_by(Year) %>%
summarise(AvgMortgage30Rate = mean(MORTGAGE30US))
# Merging mortgage rate data with main economic data
economic_data_crisis <- economic_data_crisis %>%
left_join(avg_mortgage15, by = "Year") %>%
left_join(avg_mortgage30, by = "Year")
# Filtering mortgage data from 2000 to 2010
mortgage15_filtered <- filter(mortgage15, Year >= 2000 & Year <= 2010)
mortgage30_filtered <- filter(mortgage30, Year >= 2000 & Year <= 2010)
# Plotting the mortgage rate trends from 2000 to 2010
mortgage_rate_plot <- ggplot() +
geom_line(data = mortgage15_filtered, aes(x = DATE, y = MORTGAGE15US, color = "15-Year Mortgage Rate"), linewidth = 1) +
geom_line(data = mortgage30_filtered, aes(x = DATE, y = MORTGAGE30US, color = "30-Year Mortgage Rate"), linewidth = 1) +
labs(title = "Mortgage Rates (2000-2010)",
x = "Year",
y = "Mortgage Rate (%)",
color = "Mortgage Rate Type") +
scale_color_manual(values = c("15-Year Mortgage Rate" = "steelblue", "30-Year Mortgage Rate" = "darkorange")) +
theme_minimal() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y")
mortgage_rate_plot
# Unemployment visualization
ggplot(data = unemployment, aes(x = DATE, y = UNRATE)) +
geom_line() + # This adds a line graph
labs(title = "Unemployment Rate Over Time",
x = "Date",
y = "Unemployment Rate (%)") +
theme_minimal()
# Focus on the years from 2019 to 2022
Covid_crisis <- filter(economic_data, Year >= 2019 & Year <= 2022)
# Apply region mapping
region_mapping <- c('1' = 'Northeast', '2' = 'Midwest', '3' = 'South', '4' = 'West')
Covid_crisis$REGION <- factor(Covid_crisis$REGION, levels = names(region_mapping), labels = region_mapping)
glimpse(Covid_crisis)
## Rows: 16
## Columns: 7
## $ REGION <fct> Northeast, Northeast, Northeast, Northeast, Midwest…
## $ Consumer_Spending <dbl> 315365.2, 305907.9, 340473.0, 369889.4, 239730.7, 2…
## $ Year <dbl> 2019, 2020, 2021, 2022, 2019, 2020, 2021, 2022, 201…
## $ Disposable_Income <dbl> 354637.6, 378656.2, 397053.7, 388295.1, 265311.9, 2…
## $ Population <dbl> 6220311, 6381164, 6360380, 6336316, 5694084, 574748…
## $ SFH_Sales <dbl> 30333, 37667, 35833, 32583, 72417, 93250, 84250, 65…
## $ ZillowAvgHomePrice <dbl> 286114.9, 303382.1, 349039.7, 389775.3, 179952.1, 1…
# Population trends during Covid
population_covid <- Covid_crisis[c("REGION", "Year", "Population")]
# Plot population trends Covid
population_plot_covid <- ggplot(population_covid, aes(x = Year, y = Population, color = factor(REGION))) +
geom_line() +
labs(title = "Population Trends (COVID)", y = "Population", color = "Region") +
theme_minimal() +
scale_y_continuous(labels = comma)
population_plot_covid
# Population Percent Change during Covid
population_percentChange_covid <- population_covid |>
group_by(REGION) |>
mutate(Previous_pop = lag(Population, 1)) |>
mutate(Percent_Change=((Population-Previous_pop)/Previous_pop)*100) |>
select(-Population,-Previous_pop)
population_percentChange_covid
## # A tibble: 16 × 3
## # Groups: REGION [4]
## REGION Year Percent_Change
## <fct> <dbl> <dbl>
## 1 Northeast 2019 NA
## 2 Northeast 2020 2.59
## 3 Northeast 2021 -0.326
## 4 Northeast 2022 -0.378
## 5 Midwest 2019 NA
## 6 Midwest 2020 0.938
## 7 Midwest 2021 -0.173
## 8 Midwest 2022 -0.0976
## 9 South 2019 NA
## 10 South 2020 0.705
## 11 South 2021 0.702
## 12 South 2022 1.06
## 13 West 2019 NA
## 14 West 2020 0.401
## 15 West 2021 -0.0755
## 16 West 2022 0.200
# US shape file on R
us_map <- us_map(regions = "states") |> select(full,geom)
# Merging All States Zillow data with US shape file
Zillow_2019_2023 <- All_Zillow_Longer |>
group_by(STATE) |>
filter(Year %in% c(2019:2023)) |>
inner_join(us_map, join_by('STATE'=='full'))
# Animation of Changes in Zillow Average Home Prices during Covid
ggplot(Zillow_2019_2023, aes(fill=ZillowAvgHomePrice, geometry = geom)) +
geom_sf() +
transition_time(as.integer(Year)) +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "left")+
scale_fill_continuous() +
scale_x_log10() +
scale_y_log10() +
scale_fill_distiller(type="div",
palette="RdGy",
name="Zillow Price Scale",
direction=-1,
labels=scales::dollar_format()) +
guides(color="none") +
labs(fill = "Zillow Price Scale",
caption = "Source: Zillow Home Value Index: zillow.com/data/") +
ggtitle("{frame_time} Zillow Average Home Price Change")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
# Population Pyramid of Single-Family Houses Sales
ggplot(allRegions_SalesSFH, aes(x=Year, y=SFH_Sales, fill=REGION)) +
geom_bar(stat = "identity") +
coord_flip() +
facet_wrap(~REGION, scales = "free_y", labeller = labeller(REGION = c("1" = "Northeast", "2" = "Midwest", "3" = "South", "4" = "West"))) +
scale_y_continuous(labels = scales::comma) +
labs(title = "Change in Sales of Single-Family Homes: 2000 - 2023", x = "Year", y = "Number of Sales") +
theme_bw()+
theme(legend.position = "none") +
scale_color_date()
# Trend analysis over Covid years
ggplot(Covid_crisis, aes(x = Year)) +
geom_line(aes(y = Consumer_Spending, color = "Consumer Spending (USD)")) +
geom_line(aes(y = Disposable_Income, color = "Disposable Income (USD)")) +
geom_line(aes(y = ZillowAvgHomePrice, color = "Home Prices (USD)")) +
facet_wrap(~REGION) +
labs(title = "Economic Indicators Over Time (COVID)", y = "Value") +
scale_color_manual(values = c("Consumer Spending (USD)" = "blue",
"Disposable Income (USD)" = "green",
"Home Prices (USD)" = "red")) +
theme_minimal() +
guides(color = guide_legend(title = "Indicators")) +
scale_y_continuous(labels = comma)